diff --git a/eng/MSBuild/LegacySupport.props b/eng/MSBuild/LegacySupport.props
index 6b110acaaa1..7bda63a6607 100644
--- a/eng/MSBuild/LegacySupport.props
+++ b/eng/MSBuild/LegacySupport.props
@@ -74,4 +74,8 @@
+
+
+
+
diff --git a/src/LegacySupport/CollectionBuilder/CollectionBuilderAttribute.cs b/src/LegacySupport/CollectionBuilder/CollectionBuilderAttribute.cs
new file mode 100644
index 00000000000..569daa70dff
--- /dev/null
+++ b/src/LegacySupport/CollectionBuilder/CollectionBuilderAttribute.cs
@@ -0,0 +1,17 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace System.Runtime.CompilerServices;
+
+[AttributeUsage(AttributeTargets.Class | AttributeTargets.Struct | AttributeTargets.Interface)]
+internal sealed class CollectionBuilderAttribute : Attribute
+{
+ public CollectionBuilderAttribute(Type builderType, string methodName)
+ {
+ BuilderType = builderType;
+ MethodName = methodName;
+ }
+
+ public Type BuilderType { get; }
+ public string MethodName { get; }
+}
diff --git a/src/LegacySupport/CollectionBuilder/README.md b/src/LegacySupport/CollectionBuilder/README.md
new file mode 100644
index 00000000000..15e9274d433
--- /dev/null
+++ b/src/LegacySupport/CollectionBuilder/README.md
@@ -0,0 +1,7 @@
+To use this source in your project, add the following to your `.csproj` file:
+
+```xml
+
+ true
+
+```
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Console/README.md b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Console/README.md
index c21e2a299ad..580facd6294 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Console/README.md
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Console/README.md
@@ -5,6 +5,8 @@
* [`Microsoft.Extensions.AI.Evaluation`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation) - Defines core abstractions and types for supporting evaluation.
* [`Microsoft.Extensions.AI.Evaluation.Quality`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Quality) - Contains evaluators that can be used to evaluate the quality of AI responses in your projects including Relevance, Truth, Completeness, Fluency, Coherence, Retrieval, Equivalence and Groundedness.
* [`Microsoft.Extensions.AI.Evaluation.Safety`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Safety) - Contains a set of evaluators that are built atop the Azure AI Foundry Evaluation service that can be used to evaluate the content safety of AI responses in your projects including Protected Material, Groundedness Pro, Ungrounded Attributes, Hate and Unfairness, Self Harm, Violence, Sexual, Code Vulnerability and Indirect Attack.
+* [`Microsoft.Extensions.AI.Evaluation.NLP`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.NLP) - Contains a set of evaluators that implement common algorithms for evaluating machine translation and natural
+language processing tasks. Evaluators currently include BLEU score, with more planned.
* [`Microsoft.Extensions.AI.Evaluation.Reporting`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Reporting) - Contains support for caching LLM responses, storing the results of evaluations and generating reports from that data.
* [`Microsoft.Extensions.AI.Evaluation.Reporting.Azure`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Reporting.Azure) - Supports the `Microsoft.Extensions.AI.Evaluation.Reporting` library with an implementation for caching LLM responses and storing the evaluation results in an Azure Storage container.
* [`Microsoft.Extensions.AI.Evaluation.Console`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Console) - A command line dotnet tool for generating reports and managing evaluation data.
@@ -18,6 +20,7 @@ dotnet add package Microsoft.Extensions.AI.Evaluation
dotnet add package Microsoft.Extensions.AI.Evaluation.Quality
dotnet add package Microsoft.Extensions.AI.Evaluation.Safety
dotnet add package Microsoft.Extensions.AI.Evaluation.Reporting
+dotnet add package Microsoft.Extensions.AI.Evaluation.NLP
```
Or directly in the C# project file:
@@ -28,6 +31,7 @@ Or directly in the C# project file:
+
```
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/BLEUEvaluator.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/BLEUEvaluator.cs
new file mode 100644
index 00000000000..8ce43d48e52
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/BLEUEvaluator.cs
@@ -0,0 +1,96 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.Extensions.AI.Evaluation.NLP.Common;
+using Microsoft.Extensions.AI.Evaluation.Utilities;
+using Microsoft.Shared.Diagnostics;
+
+namespace Microsoft.Extensions.AI.Evaluation.NLP;
+
+///
+/// An that evaluates the quality of a response produced by an AI model by comparing
+/// it to a reference response using the BLEU (Bilingual Evaluation Understudy) algorithm. It is often used
+/// to evaluate the quality of machine translation or text generation tasks.
+///
+///
+///
+/// The computes the BLEU score of a response ("hypothesis") compared to a reference
+/// supplied via . The score is returned in a
+/// with a value between 0.0 and 1.0 where 0.0 represents no match at all and 1.0 indicates a perfect match.
+/// By default, the score is interpreted with a pass/fail cutoff of 0.5. So a score of 0.5 or higher is
+/// passing and a score below 0.5 is failing.
+///
+///
+public sealed class BLEUEvaluator : IEvaluator
+{
+ ///
+ /// Gets the of the returned by
+ /// .
+ ///
+ public static string BLEUMetricName => "BLEU";
+
+ ///
+ public IReadOnlyCollection EvaluationMetricNames { get; } = [BLEUMetricName];
+
+ ///
+ public ValueTask EvaluateAsync(
+ IEnumerable messages,
+ ChatResponse modelResponse,
+ ChatConfiguration? chatConfiguration = null,
+ IEnumerable? additionalContext = null,
+ CancellationToken cancellationToken = default)
+ {
+ _ = Throw.IfNull(modelResponse);
+
+ var metric = new NumericMetric(BLEUMetricName);
+ var result = new EvaluationResult(metric);
+
+ if (string.IsNullOrWhiteSpace(modelResponse.Text))
+ {
+ metric.AddDiagnostics(
+ EvaluationDiagnostic.Error($"The {nameof(modelResponse)} supplied for evaluation was null or empty."));
+
+ return new ValueTask(result);
+ }
+
+ if (additionalContext?.OfType().FirstOrDefault()
+ is not BLEUEvaluatorContext context)
+ {
+ metric.AddDiagnostics(
+ EvaluationDiagnostic.Error(
+ $"A value of type '{nameof(BLEUEvaluatorContext)}' was not found in the '{nameof(additionalContext)}' collection."));
+
+ return new ValueTask(result);
+ }
+
+ if (context.References.Count is 0)
+ {
+ metric.AddDiagnostics(
+ EvaluationDiagnostic.Error(
+ $"Supplied '{nameof(BLEUEvaluatorContext)}' did not contain any '{nameof(BLEUEvaluatorContext.References)}'."));
+
+ return new ValueTask(result);
+ }
+
+ var (score, duration) = TimingHelper.ExecuteWithTiming(() =>
+ {
+ var references = context.References.Select(reference => SimpleWordTokenizer.WordTokenize(reference));
+ var hypothesis = SimpleWordTokenizer.WordTokenize(modelResponse.Text);
+ return BLEUAlgorithm.SentenceBLEU(references, hypothesis, BLEUAlgorithm.DefaultBLEUWeights, SmoothingFunction.Method4);
+ });
+
+ metric.Value = score;
+ string durationText = $"{duration.TotalSeconds.ToString("F2", CultureInfo.InvariantCulture)} s";
+ metric.AddOrUpdateMetadata(name: "evaluation-duration", value: durationText);
+ metric.AddOrUpdateContext(context);
+ metric.Interpretation = NLPScoreInterpretation.Interpret(metric);
+
+ return new ValueTask(result);
+ }
+
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/BLEUEvaluatorContext.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/BLEUEvaluatorContext.cs
new file mode 100644
index 00000000000..320b20e9116
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/BLEUEvaluatorContext.cs
@@ -0,0 +1,62 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#pragma warning disable S3604
+// S3604: Member initializer values should not be redundant.
+// We disable this warning because it is a false positive arising from the analyzer's lack of support for C#'s primary
+// constructor syntax.
+
+using System.Collections.Generic;
+using System.Linq;
+
+namespace Microsoft.Extensions.AI.Evaluation.NLP;
+
+///
+/// Contextual information that the uses to compute the BLEU score for a response.
+///
+///
+/// measures the BLEU score of a response compared to a reference. BLEU (Bilingual Evaluation Understudy)
+/// is a metric used to evaluate the quality of machine-generated text.
+///
+public sealed class BLEUEvaluatorContext : EvaluationContext
+{
+ ///
+ /// Gets the unique that is used for
+ /// .
+ ///
+ public static string BLEUContextName => "BLEU Context";
+
+ ///
+ /// Gets the reference responses against which the provided model response will be scored.
+ ///
+ ///
+ /// The measures the degree to which the response being evaluated is similar to
+ /// the response supplied via . The metric will be reported as a BLEU score.
+ ///
+ public IReadOnlyList References { get; }
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ ///
+ /// The reference responses against which the response that is being evaluated is compared.
+ ///
+ public BLEUEvaluatorContext(params string[] references)
+ : this(references as IEnumerable)
+ {
+ }
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ ///
+ /// The reference responses against which the response that is being evaluated is compared.
+ ///
+ public BLEUEvaluatorContext(IEnumerable references)
+ : base(
+ name: BLEUContextName,
+ contents: [.. references.Select(c => new TextContent(c))])
+ {
+ References = [.. references];
+ }
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/BLEUAlgorithm.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/BLEUAlgorithm.cs
new file mode 100644
index 00000000000..c7420d0be7a
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/BLEUAlgorithm.cs
@@ -0,0 +1,197 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Microsoft.Shared.Diagnostics;
+
+namespace Microsoft.Extensions.AI.Evaluation.NLP.Common;
+
+///
+/// Helper methods for calculating the BLEU score.
+/// See BLEU on Wikipedia or
+/// NLTK implementation
+/// for more details.
+///
+internal static class BLEUAlgorithm
+{
+ internal static int ClosestRefLength(IEnumerable> references, int hypLength)
+ {
+ if (!references.Any())
+ {
+ return 0;
+ }
+
+ int closestRefLength = 0;
+ int smallestDiff = int.MaxValue;
+ foreach (var reference in references)
+ {
+ int refLength = reference.Count();
+ int diff = Math.Abs(refLength - hypLength);
+ if (diff < smallestDiff ||
+ (diff == smallestDiff && refLength < closestRefLength))
+ {
+ smallestDiff = diff;
+ closestRefLength = refLength;
+ }
+ }
+
+ return closestRefLength;
+ }
+
+ internal static double BrevityPenalty(int closestRefLength, int hypLength)
+ {
+ if (hypLength <= 0)
+ {
+ return 0.0;
+ }
+
+ if (closestRefLength <= 0 || hypLength > closestRefLength)
+ {
+ return 1.0;
+ }
+
+ return Math.Exp(1 - ((double)closestRefLength / hypLength));
+ }
+
+ internal static RationalNumber ModifiedPrecision(IEnumerable> references, IEnumerable hypothesis, int n = 1)
+ {
+ if (n <= 0)
+ {
+ Throw.ArgumentOutOfRangeException(nameof(n), $"`{nameof(n)}` must be greater than zero.");
+ }
+
+ if (!references.Any() || !hypothesis.Any())
+ {
+ return RationalNumber.Zero;
+ }
+
+ var hyp = hypothesis.CreateNGrams(n);
+ var hypCounts = new MatchCounter>(hyp);
+
+ Dictionary, int> maxCounts = [];
+
+ foreach (var rf in references)
+ {
+ IEnumerable> refGrams = rf.CreateNGrams(n);
+ var refCounts = new MatchCounter>(refGrams);
+
+ foreach (var ct in refCounts)
+ {
+ if (maxCounts.TryGetValue(ct.Key, out int val))
+ {
+ maxCounts[ct.Key] = Math.Max(val, ct.Value);
+ }
+ else
+ {
+ maxCounts[ct.Key] = ct.Value;
+ }
+ }
+ }
+
+ Dictionary, int> clippedCounts = [];
+ foreach (var h in hypCounts)
+ {
+ if (maxCounts.TryGetValue(h.Key, out var v))
+ {
+ clippedCounts[h.Key] = Math.Min(h.Value, v);
+ }
+ else
+ {
+ // If the hypothesis n-gram is not in any reference, it is clipped to 0.
+ clippedCounts[h.Key] = 0;
+ }
+ }
+
+ int numerator = clippedCounts.Values.Sum();
+ int denominator = Math.Max(1, hypCounts.Sum());
+
+ return new RationalNumber(numerator, denominator);
+ }
+
+ ///
+ /// Generate an n-sized array of equal weights that sum to 1.0.
+ ///
+ /// Number of weights to return.
+ /// Array of equal sized values that sum to 1.0.
+ internal static double[] EqualWeights(int n)
+ {
+ if (n <= 0)
+ {
+ Throw.ArgumentOutOfRangeException(nameof(n), $"'{nameof(n)}' must be greater than zero.");
+ }
+
+ double[] weights = new double[n];
+ for (int i = 0; i < n; i++)
+ {
+ weights[i] = 1.0 / n;
+ }
+
+ return weights;
+ }
+
+ internal static readonly double[] DefaultBLEUWeights = EqualWeights(4);
+
+ internal static double SentenceBLEU(IEnumerable> references, IEnumerable hypothesis,
+ double[]? weights = null, Func? smoothingFunction = null)
+ {
+ if (references == null || !references.Any())
+ {
+ Throw.ArgumentNullException(nameof(references), $"'{nameof(references)}' cannot be null or empty.");
+ }
+
+ if (hypothesis == null || !hypothesis.Any())
+ {
+ Throw.ArgumentNullException(nameof(hypothesis), $"'{nameof(hypothesis)}' cannot be null or empty.");
+ }
+
+ if (weights is null)
+ {
+ weights = DefaultBLEUWeights;
+ }
+
+ if (weights.Length == 0)
+ {
+ Throw.ArgumentNullException(nameof(weights), $"'{nameof(weights)}' cannot be empty.");
+ }
+
+ var precisionValues = new RationalNumber[weights.Length];
+ for (int i = 0; i < weights.Length; i++)
+ {
+ int n = i + 1;
+ RationalNumber prec = ModifiedPrecision(references, hypothesis, n);
+
+ if (i == 0 && prec.Numerator == 0)
+ {
+ // If the precision for unigrams (n == 1) is zero, the there can be no higher order matches and BLEU score is zero.
+ return 0.0;
+ }
+
+ precisionValues[i] = prec;
+ }
+
+ int hypLen = hypothesis.Count();
+ int closestRefLength = ClosestRefLength(references, hypLen);
+ double brevityPenalty = BrevityPenalty(closestRefLength, hypLen);
+
+ if (smoothingFunction == null)
+ {
+ smoothingFunction = SmoothingFunction.Method0;
+ }
+
+ double[] smoothedValues = smoothingFunction(precisionValues, hypLen);
+
+ double score = 0.0;
+ for (int i = 0; i < weights.Length; i++)
+ {
+ if (smoothedValues[i] > 0)
+ {
+ score += weights[i] * Math.Log(smoothedValues[i]);
+ }
+ }
+
+ return brevityPenalty * Math.Exp(score);
+ }
+
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/MatchCounter.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/MatchCounter.cs
new file mode 100644
index 00000000000..bbca2252057
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/MatchCounter.cs
@@ -0,0 +1,61 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using Microsoft.Shared.Diagnostics;
+
+namespace Microsoft.Extensions.AI.Evaluation.NLP.Common;
+
+[DebuggerDisplay("{ToDebugString(),nq}")]
+internal readonly struct MatchCounter : IEnumerable>
+ where T : IEquatable
+{
+ private readonly Dictionary _counts = [];
+
+ public readonly int Sum() => _counts.Values.Sum();
+
+ public MatchCounter()
+ {
+ }
+
+ public MatchCounter(IEnumerable items)
+ {
+ _ = Throw.IfNull(items, nameof(items));
+ AddRange(items);
+ }
+
+ public void Add(T item)
+ {
+ if (_counts.TryGetValue(item, out int currentCount))
+ {
+ _counts[item] = currentCount + 1;
+ }
+ else
+ {
+ _counts[item] = 1;
+ }
+ }
+
+ public void AddRange(IEnumerable items)
+ {
+ if (items == null)
+ {
+ return;
+ }
+
+ foreach (var item in items)
+ {
+ Add(item);
+ }
+ }
+
+ public string ToDebugString() => string.Concat(_counts.Select(v => $"{v.Key}: {v.Value}, "));
+
+ public IEnumerator> GetEnumerator() => _counts.GetEnumerator();
+
+ IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)_counts).GetEnumerator();
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/NGram.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/NGram.cs
new file mode 100644
index 00000000000..5fb66461faf
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/NGram.cs
@@ -0,0 +1,55 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using Microsoft.Shared.Diagnostics;
+
+namespace Microsoft.Extensions.AI.Evaluation.NLP.Common;
+
+[DebuggerDisplay("{ToDebugString(),nq}")]
+[CollectionBuilder(typeof(NGramExtensions), nameof(NGramExtensions.CreateNGram))]
+internal readonly struct NGram : IEquatable>, IEnumerable
+ where T : IEquatable
+{
+ public NGram(ReadOnlySpan values)
+ : this(values.ToArray())
+ {
+ }
+
+ public NGram(params T[] values)
+ {
+ Values = Throw.IfNull(values, nameof(values));
+ _ = Throw.IfLessThan(values.Length, 1, nameof(values));
+ }
+
+ public readonly T[] Values { get; }
+
+ public int Length => Values.Length;
+
+ public bool Equals(NGram other)
+ => Values.SequenceEqual(other.Values);
+
+ public override bool Equals(object? obj) => obj is NGram other && Equals(other);
+
+ public override int GetHashCode()
+ {
+ int hashCode = 0;
+ foreach (var value in Values)
+ {
+ hashCode = HashCode.Combine(hashCode, value.GetHashCode());
+ }
+
+ return hashCode;
+ }
+
+ public IEnumerator GetEnumerator() => ((IEnumerable)Values).GetEnumerator();
+
+ IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
+
+ public string ToDebugString() => $"[{string.Join(",", Values.Select(v => v.ToString()))}]";
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/NGramExtensions.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/NGramExtensions.cs
new file mode 100644
index 00000000000..149d3820328
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/NGramExtensions.cs
@@ -0,0 +1,41 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Microsoft.Shared.Diagnostics;
+
+namespace Microsoft.Extensions.AI.Evaluation.NLP.Common;
+
+internal static class NGramExtensions
+{
+ // Collection builder method.
+ public static NGram CreateNGram(this ReadOnlySpan values)
+ where T : IEquatable => new(values);
+
+ ///
+ /// Create a sequence of n-grams from the input sequence.
+ ///
+ /// The input sequence of items.
+ /// The size of each n-gram.
+ internal static IEnumerable> CreateNGrams(this IEnumerable input, int n)
+ where T : IEquatable
+ {
+ if (n <= 0)
+ {
+ Throw.ArgumentOutOfRangeException(nameof(n), $"'{nameof(n)}' must be greater than zero.");
+ }
+
+ T[] output = [.. input.Take(n)];
+
+ while (output.Length == n)
+ {
+ yield return new NGram(output);
+
+ input = input.Skip(1);
+ output = [.. input.Take(n)];
+ }
+ }
+
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/NLPScoreInterpretation.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/NLPScoreInterpretation.cs
new file mode 100644
index 00000000000..4ef1d08b468
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/NLPScoreInterpretation.cs
@@ -0,0 +1,36 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace Microsoft.Extensions.AI.Evaluation.NLP.Common;
+
+internal static class NLPScoreInterpretation
+{
+ internal static EvaluationMetricInterpretation Interpret(NumericMetric metric)
+ {
+ // Many NLP scores range from 0.0 to 1.0, where:
+ // - 0.0 means no match at all,
+ // - 1.0 means a perfect match.
+ // 0.5 is considered the minimum passing score for evaluation.
+
+ EvaluationRating rating = metric.Value switch
+ {
+ null => EvaluationRating.Inconclusive,
+ > 1.0 => EvaluationRating.Inconclusive,
+ > 0.8 and <= 1.0 => EvaluationRating.Exceptional,
+ > 0.6 and <= 0.8 => EvaluationRating.Good,
+ > 0.4 and <= 0.6 => EvaluationRating.Average,
+ > 0.2 and <= 0.4 => EvaluationRating.Poor,
+ >= 0.0 and <= 0.2 => EvaluationRating.Unacceptable,
+ < 0.0 => EvaluationRating.Inconclusive,
+ _ => EvaluationRating.Inconclusive,
+ };
+
+ const double MinimumPassingScore = 0.5;
+ return metric.Value is double value && value < MinimumPassingScore
+ ? new EvaluationMetricInterpretation(
+ rating,
+ failed: true,
+ reason: $"{metric.Name} is less than {MinimumPassingScore}.")
+ : new EvaluationMetricInterpretation(rating);
+ }
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/RationalNumber.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/RationalNumber.cs
new file mode 100644
index 00000000000..500b042b17b
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/RationalNumber.cs
@@ -0,0 +1,39 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Diagnostics;
+
+namespace Microsoft.Extensions.AI.Evaluation.NLP.Common;
+
+[DebuggerDisplay("{ToDebugString(),nq}")]
+internal readonly struct RationalNumber : IEquatable
+{
+ public static readonly RationalNumber Zero = new(0, 1);
+
+ public RationalNumber(int numerator, int denominator)
+ {
+ if (denominator == 0)
+ {
+ throw new DivideByZeroException("Denominator cannot be zero.");
+ }
+
+ Numerator = numerator;
+ Denominator = denominator;
+ }
+
+ public int Numerator { get; }
+ public int Denominator { get; }
+
+ public double ToDouble() => (double)Numerator / Denominator;
+
+ public string ToDebugString() => $"{Numerator}/{Denominator}";
+
+ public bool Equals(RationalNumber other)
+ => other.Numerator == Numerator && other.Denominator == Denominator;
+
+ public override bool Equals(object? obj) => obj is RationalNumber other && Equals(other);
+
+ public override int GetHashCode()
+ => HashCode.Combine(Numerator, Denominator);
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/SimpleWordTokenizer.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/SimpleWordTokenizer.cs
new file mode 100644
index 00000000000..4f4717852bd
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/SimpleWordTokenizer.cs
@@ -0,0 +1,217 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Text;
+using Microsoft.Shared.Diagnostics;
+
+#pragma warning disable S109 // Magic numbers should not be used
+
+namespace Microsoft.Extensions.AI.Evaluation.NLP.Common;
+
+///
+/// Tokenizes a string into segments using the common rules established by the NLTK word tokenizer.
+///
+internal static class SimpleWordTokenizer
+{
+ ///
+ /// Tokenizes the input text into individual words based on specific rules for text normalization and segmentation.
+ ///
+ /// This method applies text normalization steps, such as removing skipped markers, handling line
+ /// breaks, and replacing common HTML entities. It also ensures consistent tokenization by inserting spaces around
+ /// punctuation, symbols, and certain character patterns. The tokenization rules are inspired by common BLEU algorithms,
+ /// such as those used in NLTK, SacreBLEU, and MOSES.
+ /// The input text to be tokenized. Cannot be .
+ /// An enumerable collection of strings, where each string represents a tokenized word. The collection will be empty
+ /// if the input text contains no valid tokens.
+ public static IEnumerable WordTokenize(string text)
+ {
+ _ = Throw.IfNull(text, nameof(text));
+
+ return WordTokenize(text.AsMemory());
+ }
+
+ ///
+ /// Tokenizes the input text into individual words based on specific rules for text normalization and segmentation.
+ ///
+ /// This method applies text normalization steps, such as removing skipped markers, handling line
+ /// breaks, and replacing common HTML entities. It also ensures consistent tokenization by inserting spaces around
+ /// punctuation, symbols, and certain character patterns. The tokenization rules are inspired by common BLEU algorithms,
+ /// such as those used in NLTK, SacreBLEU, and MOSES.
+ /// The input text to be tokenized. Cannot be .
+ /// An enumerable collection of strings, where each string represents a tokenized word. The collection will be empty
+ /// if the input text contains no valid tokens.
+ public static IEnumerable WordTokenize(ReadOnlyMemory text)
+ {
+ StringBuilder sb = new StringBuilder();
+
+ while (true)
+ {
+ if (text.IsEmpty)
+ {
+ if (sb.Length > 0)
+ {
+ yield return sb.ToString();
+ _ = sb.Clear();
+ }
+
+ yield break;
+ }
+
+ var span = text.Span;
+ char nextChar = span[0];
+
+ // Skip whitespace as separator
+ if (char.IsWhiteSpace(nextChar))
+ {
+ if (sb.Length > 0)
+ {
+ yield return sb.ToString();
+ _ = sb.Clear();
+ }
+
+ text = text.Slice(1);
+ continue;
+ }
+
+ // Join hyphenated words
+ if (span[0] == '-' &&
+ span.Length > 1 &&
+ span[1] == '\n')
+ {
+ text = text.Slice(2);
+ continue;
+ }
+
+ if (span[0] == '-' &&
+ span.Length > 2 &&
+ span[1] == '\r' &&
+ span[2] == '\n')
+ {
+ text = text.Slice(3);
+ continue;
+ }
+
+ // Translate HTML entities
+ if (nextChar == '&')
+ {
+ if (span.StartsWith(""".AsSpan()))
+ {
+ if (sb.Length > 0)
+ {
+ yield return sb.ToString();
+ _ = sb.Clear();
+ }
+
+ text = text.Slice(""".Length);
+ yield return "\"";
+ continue;
+ }
+ else if (span.StartsWith("&".AsSpan()))
+ {
+ if (sb.Length > 0)
+ {
+ yield return sb.ToString();
+ _ = sb.Clear();
+ }
+
+ text = text.Slice("&".Length);
+ yield return "&";
+ continue;
+ }
+ else if (span.StartsWith("<".AsSpan()))
+ {
+ if (sb.Length > 0)
+ {
+ yield return sb.ToString();
+ _ = sb.Clear();
+ }
+
+ text = text.Slice("<".Length);
+ yield return "<";
+ continue;
+ }
+ else if (span.StartsWith(">".AsSpan()))
+ {
+ if (sb.Length > 0)
+ {
+ yield return sb.ToString();
+ _ = sb.Clear();
+ }
+
+ text = text.Slice(">".Length);
+ yield return ">";
+ continue;
+ }
+ else if (span.StartsWith("'".AsSpan()))
+ {
+ if (sb.Length > 0)
+ {
+ yield return sb.ToString();
+ _ = sb.Clear();
+ }
+
+ text = text.Slice("'".Length);
+ yield return "'";
+ continue;
+ }
+ }
+
+ // Each symbol is a separate token
+ if (char.IsSymbol(nextChar))
+ {
+ if (sb.Length > 0)
+ {
+ yield return sb.ToString();
+ _ = sb.Clear();
+ }
+
+ yield return nextChar.ToString();
+ text = text.Slice(1);
+ continue;
+ }
+
+ // Return punctuation
+ if (char.IsPunctuation(nextChar))
+ {
+ if (sb.Length > 0)
+ {
+ yield return sb.ToString();
+ _ = sb.Clear();
+ }
+
+ yield return nextChar.ToString();
+ text = text.Slice(1);
+ continue;
+ }
+
+ // if we have a number, consume it along with any internal punctuation
+ if (char.IsNumber(nextChar))
+ {
+ // in this case we are still building a token, then the number
+ // should be added to the end of it, rather than as a separate number
+ if (sb.Length > 0)
+ {
+ _ = sb.Append(nextChar);
+ text = text.Slice(1);
+ continue;
+ }
+
+ while (!text.IsEmpty && (char.IsNumber(text.Span[0]) || char.IsPunctuation(text.Span[0])))
+ {
+ _ = sb.Append(text.Span[0]);
+ text = text.Slice(1);
+ }
+
+ yield return sb.ToString();
+ _ = sb.Clear();
+ continue;
+ }
+
+ _ = sb.Append(char.ToUpperInvariant(nextChar));
+ text = text.Slice(1);
+ }
+
+ }
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/SmoothingFunction.cs b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/SmoothingFunction.cs
new file mode 100644
index 00000000000..0e3071f6bdd
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Common/SmoothingFunction.cs
@@ -0,0 +1,74 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+
+namespace Microsoft.Extensions.AI.Evaluation.NLP.Common;
+
+///
+/// Implementations of smoothing functions for BLEU scores taken from
+/// `A Systematic Comparison of Smoothing Techniques for Sentence-Level BLEU`
+/// by Chen and Cherry. http://acl2014.org/acl2014/W14-33/pdf/W14-3346.pdf.
+///
+internal static class SmoothingFunction
+{
+ ///
+ /// This is the baseline method, which does not apply any smoothing.
+ ///
+ /// N precision values to be smoothed.
+ /// Number of tokens in the hypothesis.
+ /// Smoothed precision values.
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Style", "IDE0060:Remove unused parameter", Justification = "Matches expected signature of SmoothingFunction")]
+ internal static double[] Method0(RationalNumber[] precisions, int hypLen)
+ {
+ double[] smoothed = new double[precisions.Length];
+ for (int i = 0; i < precisions.Length; i++)
+ {
+ if (precisions[i].Numerator == 0)
+ {
+ smoothed[i] = double.Epsilon;
+ }
+ else
+ {
+ smoothed[i] = precisions[i].ToDouble();
+ }
+ }
+
+ return smoothed;
+ }
+
+ ///
+ /// Smoothing method 4:
+ /// Shorter translations may have inflated precision values due to having
+ /// smaller denominators; therefore, we give them proportionally
+ /// smaller smoothed counts. Instead of scaling to 1/(2^k), Chen and Cherry
+ /// suggests dividing by 1/ln(len(T)), where T is the length of the translation.
+ ///
+ /// N precision values to be smoothed.
+ /// Number of tokens in the hypothesis.
+ /// Smoothed precision values.
+ internal static double[] Method4(RationalNumber[] precisions, int hypLen)
+ {
+ const double DefaultK = 5.0;
+
+ double[] smoothed = new double[precisions.Length];
+
+ int inc = 1;
+ for (int i = 0; i < precisions.Length; i++)
+ {
+ RationalNumber p = precisions[i];
+ if (p.Numerator == 0 && hypLen > 1)
+ {
+ double numerator = 1 / (Math.Pow(2.0, inc) * DefaultK / Math.Log(hypLen));
+ smoothed[i] = numerator / p.Denominator;
+ inc++;
+ }
+ else
+ {
+ smoothed[i] = p.ToDouble();
+ }
+ }
+
+ return smoothed;
+ }
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Microsoft.Extensions.AI.Evaluation.NLP.csproj b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Microsoft.Extensions.AI.Evaluation.NLP.csproj
new file mode 100644
index 00000000000..0bab1cf7fb0
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/Microsoft.Extensions.AI.Evaluation.NLP.csproj
@@ -0,0 +1,39 @@
+
+
+
+ A library that contains a set of evaluators that implement commonly used algorithmic evaluators.
+ $(TargetFrameworks);netstandard2.0
+ Microsoft.Extensions.AI.Evaluation.NLP
+
+
+
+ AIEval
+ preview
+ true
+ false
+ 0
+ 0
+
+
+
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/README.md b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/README.md
new file mode 100644
index 00000000000..580facd6294
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.NLP/README.md
@@ -0,0 +1,53 @@
+# The Microsoft.Extensions.AI.Evaluation libraries
+
+`Microsoft.Extensions.AI.Evaluation` is a set of .NET libraries defined in the following NuGet packages that have been designed to work together to support building processes for evaluating the quality of AI software.
+
+* [`Microsoft.Extensions.AI.Evaluation`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation) - Defines core abstractions and types for supporting evaluation.
+* [`Microsoft.Extensions.AI.Evaluation.Quality`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Quality) - Contains evaluators that can be used to evaluate the quality of AI responses in your projects including Relevance, Truth, Completeness, Fluency, Coherence, Retrieval, Equivalence and Groundedness.
+* [`Microsoft.Extensions.AI.Evaluation.Safety`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Safety) - Contains a set of evaluators that are built atop the Azure AI Foundry Evaluation service that can be used to evaluate the content safety of AI responses in your projects including Protected Material, Groundedness Pro, Ungrounded Attributes, Hate and Unfairness, Self Harm, Violence, Sexual, Code Vulnerability and Indirect Attack.
+* [`Microsoft.Extensions.AI.Evaluation.NLP`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.NLP) - Contains a set of evaluators that implement common algorithms for evaluating machine translation and natural
+language processing tasks. Evaluators currently include BLEU score, with more planned.
+* [`Microsoft.Extensions.AI.Evaluation.Reporting`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Reporting) - Contains support for caching LLM responses, storing the results of evaluations and generating reports from that data.
+* [`Microsoft.Extensions.AI.Evaluation.Reporting.Azure`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Reporting.Azure) - Supports the `Microsoft.Extensions.AI.Evaluation.Reporting` library with an implementation for caching LLM responses and storing the evaluation results in an Azure Storage container.
+* [`Microsoft.Extensions.AI.Evaluation.Console`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Console) - A command line dotnet tool for generating reports and managing evaluation data.
+
+## Install the packages
+
+From the command-line:
+
+```console
+dotnet add package Microsoft.Extensions.AI.Evaluation
+dotnet add package Microsoft.Extensions.AI.Evaluation.Quality
+dotnet add package Microsoft.Extensions.AI.Evaluation.Safety
+dotnet add package Microsoft.Extensions.AI.Evaluation.Reporting
+dotnet add package Microsoft.Extensions.AI.Evaluation.NLP
+```
+
+Or directly in the C# project file:
+
+```xml
+
+
+
+
+
+
+
+```
+
+You can optionally add the `Microsoft.Extensions.AI.Evaluation.Reporting.Azure` package in either of these places if you need Azure Storage support.
+
+## Install the command line tool
+
+```console
+dotnet tool install Microsoft.Extensions.AI.Evaluation.Console --create-manifest-if-needed
+```
+
+## Usage Examples
+
+For a comprehensive tour of all the functionality, concepts and APIs available in the `Microsoft.Extensions.AI.Evaluation` libraries, check out the [API Usage Examples](https://github.com/dotnet/ai-samples/blob/main/src/microsoft-extensions-ai-evaluation/api/) available in the [dotnet/ai-samples](https://github.com/dotnet/ai-samples) repo. These examples are structured as a collection of unit tests. Each unit test showcases a specific concept or API, and builds on the concepts and APIs showcased in previous unit tests.
+
+
+## Feedback & Contributing
+
+We welcome feedback and contributions in [our GitHub repo](https://github.com/dotnet/extensions).
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/README.md b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/README.md
index c21e2a299ad..580facd6294 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/README.md
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/README.md
@@ -5,6 +5,8 @@
* [`Microsoft.Extensions.AI.Evaluation`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation) - Defines core abstractions and types for supporting evaluation.
* [`Microsoft.Extensions.AI.Evaluation.Quality`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Quality) - Contains evaluators that can be used to evaluate the quality of AI responses in your projects including Relevance, Truth, Completeness, Fluency, Coherence, Retrieval, Equivalence and Groundedness.
* [`Microsoft.Extensions.AI.Evaluation.Safety`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Safety) - Contains a set of evaluators that are built atop the Azure AI Foundry Evaluation service that can be used to evaluate the content safety of AI responses in your projects including Protected Material, Groundedness Pro, Ungrounded Attributes, Hate and Unfairness, Self Harm, Violence, Sexual, Code Vulnerability and Indirect Attack.
+* [`Microsoft.Extensions.AI.Evaluation.NLP`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.NLP) - Contains a set of evaluators that implement common algorithms for evaluating machine translation and natural
+language processing tasks. Evaluators currently include BLEU score, with more planned.
* [`Microsoft.Extensions.AI.Evaluation.Reporting`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Reporting) - Contains support for caching LLM responses, storing the results of evaluations and generating reports from that data.
* [`Microsoft.Extensions.AI.Evaluation.Reporting.Azure`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Reporting.Azure) - Supports the `Microsoft.Extensions.AI.Evaluation.Reporting` library with an implementation for caching LLM responses and storing the evaluation results in an Azure Storage container.
* [`Microsoft.Extensions.AI.Evaluation.Console`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Console) - A command line dotnet tool for generating reports and managing evaluation data.
@@ -18,6 +20,7 @@ dotnet add package Microsoft.Extensions.AI.Evaluation
dotnet add package Microsoft.Extensions.AI.Evaluation.Quality
dotnet add package Microsoft.Extensions.AI.Evaluation.Safety
dotnet add package Microsoft.Extensions.AI.Evaluation.Reporting
+dotnet add package Microsoft.Extensions.AI.Evaluation.NLP
```
Or directly in the C# project file:
@@ -28,6 +31,7 @@ Or directly in the C# project file:
+
```
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting.Azure/README.md b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting.Azure/README.md
index c21e2a299ad..580facd6294 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting.Azure/README.md
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting.Azure/README.md
@@ -5,6 +5,8 @@
* [`Microsoft.Extensions.AI.Evaluation`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation) - Defines core abstractions and types for supporting evaluation.
* [`Microsoft.Extensions.AI.Evaluation.Quality`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Quality) - Contains evaluators that can be used to evaluate the quality of AI responses in your projects including Relevance, Truth, Completeness, Fluency, Coherence, Retrieval, Equivalence and Groundedness.
* [`Microsoft.Extensions.AI.Evaluation.Safety`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Safety) - Contains a set of evaluators that are built atop the Azure AI Foundry Evaluation service that can be used to evaluate the content safety of AI responses in your projects including Protected Material, Groundedness Pro, Ungrounded Attributes, Hate and Unfairness, Self Harm, Violence, Sexual, Code Vulnerability and Indirect Attack.
+* [`Microsoft.Extensions.AI.Evaluation.NLP`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.NLP) - Contains a set of evaluators that implement common algorithms for evaluating machine translation and natural
+language processing tasks. Evaluators currently include BLEU score, with more planned.
* [`Microsoft.Extensions.AI.Evaluation.Reporting`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Reporting) - Contains support for caching LLM responses, storing the results of evaluations and generating reports from that data.
* [`Microsoft.Extensions.AI.Evaluation.Reporting.Azure`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Reporting.Azure) - Supports the `Microsoft.Extensions.AI.Evaluation.Reporting` library with an implementation for caching LLM responses and storing the evaluation results in an Azure Storage container.
* [`Microsoft.Extensions.AI.Evaluation.Console`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Console) - A command line dotnet tool for generating reports and managing evaluation data.
@@ -18,6 +20,7 @@ dotnet add package Microsoft.Extensions.AI.Evaluation
dotnet add package Microsoft.Extensions.AI.Evaluation.Quality
dotnet add package Microsoft.Extensions.AI.Evaluation.Safety
dotnet add package Microsoft.Extensions.AI.Evaluation.Reporting
+dotnet add package Microsoft.Extensions.AI.Evaluation.NLP
```
Or directly in the C# project file:
@@ -28,6 +31,7 @@ Or directly in the C# project file:
+
```
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Safety/README.md b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Safety/README.md
index c042da70deb..e135ed24cfe 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation.Safety/README.md
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation.Safety/README.md
@@ -5,6 +5,8 @@
* [`Microsoft.Extensions.AI.Evaluation`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation) - Defines core abstractions and types for supporting evaluation.
* [`Microsoft.Extensions.AI.Evaluation.Quality`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Quality) - Contains evaluators that can be used to evaluate the quality of AI responses in your projects including Relevance, Truth, Completeness, Fluency, Coherence, Retrieval, Equivalence and Groundedness.
* [`Microsoft.Extensions.AI.Evaluation.Safety`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Safety) - Contains a set of evaluators that are built atop the Azure AI Foundry Evaluation service that can be used to evaluate the content safety of AI responses in your projects including Protected Material, Groundedness Pro, Ungrounded Attributes, Hate and Unfairness, Self Harm, Violence, Sexual, Code Vulnerability and Indirect Attack.
+* [`Microsoft.Extensions.AI.Evaluation.NLP`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.NLP) - Contains a set of evaluators that implement common algorithms for evaluating machine translation and natural
+language processing tasks. Evaluators currently include BLEU score, with more planned.
* [`Microsoft.Extensions.AI.Evaluation.Reporting`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Reporting) - Contains support for caching LLM responses, storing the results of evaluations and generating reports from that data.
* [`Microsoft.Extensions.AI.Evaluation.Reporting.Azure`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Reporting.Azure) - Supports the `Microsoft.Extensions.AI.Evaluation.Reporting` library with an implementation for caching LLM responses and storing the evaluation results in an Azure Storage container.
* [`Microsoft.Extensions.AI.Evaluation.Console`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Console) - A command line dotnet tool for generating reports and managing evaluation data.
@@ -18,6 +20,7 @@ dotnet add package Microsoft.Extensions.AI.Evaluation
dotnet add package Microsoft.Extensions.AI.Evaluation.Quality
dotnet add package Microsoft.Extensions.AI.Evaluation.Safety
dotnet add package Microsoft.Extensions.AI.Evaluation.Reporting
+dotnet add package Microsoft.Extensions.AI.Evaluation.NLP
```
Or directly in the C# project file:
@@ -28,6 +31,7 @@ Or directly in the C# project file:
+
```
diff --git a/src/Libraries/Microsoft.Extensions.AI.Evaluation/README.md b/src/Libraries/Microsoft.Extensions.AI.Evaluation/README.md
index c21e2a299ad..580facd6294 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Evaluation/README.md
+++ b/src/Libraries/Microsoft.Extensions.AI.Evaluation/README.md
@@ -5,6 +5,8 @@
* [`Microsoft.Extensions.AI.Evaluation`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation) - Defines core abstractions and types for supporting evaluation.
* [`Microsoft.Extensions.AI.Evaluation.Quality`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Quality) - Contains evaluators that can be used to evaluate the quality of AI responses in your projects including Relevance, Truth, Completeness, Fluency, Coherence, Retrieval, Equivalence and Groundedness.
* [`Microsoft.Extensions.AI.Evaluation.Safety`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Safety) - Contains a set of evaluators that are built atop the Azure AI Foundry Evaluation service that can be used to evaluate the content safety of AI responses in your projects including Protected Material, Groundedness Pro, Ungrounded Attributes, Hate and Unfairness, Self Harm, Violence, Sexual, Code Vulnerability and Indirect Attack.
+* [`Microsoft.Extensions.AI.Evaluation.NLP`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.NLP) - Contains a set of evaluators that implement common algorithms for evaluating machine translation and natural
+language processing tasks. Evaluators currently include BLEU score, with more planned.
* [`Microsoft.Extensions.AI.Evaluation.Reporting`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Reporting) - Contains support for caching LLM responses, storing the results of evaluations and generating reports from that data.
* [`Microsoft.Extensions.AI.Evaluation.Reporting.Azure`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Reporting.Azure) - Supports the `Microsoft.Extensions.AI.Evaluation.Reporting` library with an implementation for caching LLM responses and storing the evaluation results in an Azure Storage container.
* [`Microsoft.Extensions.AI.Evaluation.Console`](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Console) - A command line dotnet tool for generating reports and managing evaluation data.
@@ -18,6 +20,7 @@ dotnet add package Microsoft.Extensions.AI.Evaluation
dotnet add package Microsoft.Extensions.AI.Evaluation.Quality
dotnet add package Microsoft.Extensions.AI.Evaluation.Safety
dotnet add package Microsoft.Extensions.AI.Evaluation.Reporting
+dotnet add package Microsoft.Extensions.AI.Evaluation.NLP
```
Or directly in the C# project file:
@@ -28,6 +31,7 @@ Or directly in the C# project file:
+
```
diff --git a/test/Libraries/Microsoft.Extensions.AI.Evaluation.NLP.Tests/BLEUAlgorithmTests.cs b/test/Libraries/Microsoft.Extensions.AI.Evaluation.NLP.Tests/BLEUAlgorithmTests.cs
new file mode 100644
index 00000000000..1b029dc4a37
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Evaluation.NLP.Tests/BLEUAlgorithmTests.cs
@@ -0,0 +1,226 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Microsoft.Extensions.AI.Evaluation.NLP.Common;
+using Xunit;
+using static Microsoft.Extensions.AI.Evaluation.NLP.Common.BLEUAlgorithm;
+
+namespace Microsoft.Extensions.AI.Evaluation.NLP.Tests;
+
+public class BLEUAlgorithmTests
+{
+ [Fact]
+ public void ModifiedPrecisionTests()
+ {
+ IEnumerable> references = ["the cat is on the mat".Split(' '), "there is a cat on the mat".Split(' ')];
+ IEnumerable hypothesis = "the the the the the the the".Split(' ');
+ RationalNumber prec = ModifiedPrecision(references, hypothesis, 1);
+ Assert.Equal(0.2857, prec.ToDouble(), 4);
+
+ references = [
+ "It is a guide to action that ensures that the military will forever heed Party commands".Split(' '),
+ "It is the guiding principle which guarantees the military forces always being under the command of the Party".Split(' '),
+ "It is the practical guide for the army always to heed the directions of the party".Split(' '),
+ ];
+ hypothesis = "of the".Split(' ');
+ prec = ModifiedPrecision(references, hypothesis, 1);
+ Assert.Equal(1.0, prec.ToDouble(), 4);
+ prec = ModifiedPrecision(references, hypothesis, 2);
+ Assert.Equal(1.0, prec.ToDouble(), 4);
+
+ references = [
+ "It is a guide to action that ensures that the military will forever heed Party commands".Split(' '),
+ "It is the guiding principle which guarantees the military forces always being under the command of the Party".Split(' '),
+ "It is the practical guide for the army always to heed the directions of the party".Split(' '),
+ ];
+ IEnumerable hypothesis1 = "It is a guide to action which ensures that the military always obeys the commands of the party".Split(' ');
+ IEnumerable hypothesis2 = "It is to insure the troops forever hearing the activity guidebook that party direct".Split(' ');
+ prec = ModifiedPrecision(references, hypothesis1, 1);
+ Assert.Equal(0.9444, prec.ToDouble(), 4);
+ prec = ModifiedPrecision(references, hypothesis2, 1);
+ Assert.Equal(0.5714, prec.ToDouble(), 4);
+ prec = ModifiedPrecision(references, hypothesis1, 2);
+ Assert.Equal(0.5882, prec.ToDouble(), 4);
+ prec = ModifiedPrecision(references, hypothesis2, 2);
+ Assert.Equal(0.07692, prec.ToDouble(), 4);
+ }
+
+ [Theory]
+ [InlineData(new int[] { 0, 1, 0, 2 }, 10, new[] { 0.2303, 0.0576 })]
+ [InlineData(new int[] { 4, 5, 2, 4 }, 10, new[] { 0.8000, 0.5 })]
+ [InlineData(new int[] { 10, 14, 7, 13, 5, 12, 4, 11 }, 20, new[] { 0.7143, 0.5385, 0.4167, 0.3636 })]
+ [InlineData(new int[] { 10, 14, 7, 13, 0, 12, 0, 11 }, 20, new[] { 0.7143, 0.5385, 0.02496, 0.01362 })]
+ public void SmoothingMethod4Tests(int[] num_denom, int hypLen, double[] vals)
+ {
+ Assert.Equal(num_denom.Length, vals.Length * 2);
+
+ RationalNumber[] prec = new RationalNumber[vals.Length];
+ for (int i = 0; i < num_denom.Length - 1; i += 2)
+ {
+ prec[i / 2] = new RationalNumber(num_denom[i], num_denom[i + 1]);
+ }
+
+ double[] smoothed = SmoothingFunction.Method4(prec, hypLen);
+
+ Assert.Equal(vals.Length, smoothed.Length);
+
+ for (int i = 0; i < vals.Length; i++)
+ {
+ Assert.Equal(vals[i], smoothed[i], 4);
+ }
+ }
+
+ [Fact]
+ public void TestBrevityPenalty()
+ {
+ IEnumerable> references = [
+ Enumerable.Repeat("a", 11),
+ Enumerable.Repeat("a", 8),
+ ];
+ IEnumerable hypothesis = Enumerable.Repeat("a", 7);
+ int hypLength = hypothesis.Count();
+ int closestRefLength = ClosestRefLength(references, hypLength);
+ double brevityPenalty = BrevityPenalty(closestRefLength, hypLength);
+ Assert.Equal(0.8669, brevityPenalty, 4);
+
+ references = [
+ Enumerable.Repeat("a", 11),
+ Enumerable.Repeat("a", 8),
+ Enumerable.Repeat("a", 6),
+ Enumerable.Repeat("a", 7),
+ ];
+ hypothesis = Enumerable.Repeat("a", 7);
+ hypLength = hypothesis.Count();
+ closestRefLength = ClosestRefLength(references, hypLength);
+ brevityPenalty = BrevityPenalty(closestRefLength, hypLength);
+ Assert.Equal(1.0, brevityPenalty, 4);
+
+ references = [
+ Enumerable.Repeat("a", 28),
+ Enumerable.Repeat("a", 28),
+ ];
+ hypothesis = Enumerable.Repeat("a", 12);
+ hypLength = hypothesis.Count();
+ closestRefLength = ClosestRefLength(references, hypLength);
+ brevityPenalty = BrevityPenalty(closestRefLength, hypLength);
+ Assert.Equal(0.26359, brevityPenalty, 4);
+
+ references = [
+ Enumerable.Repeat("a", 13),
+ Enumerable.Repeat("a", 2),
+ ];
+ hypothesis = Enumerable.Repeat("a", 12);
+ hypLength = hypothesis.Count();
+ closestRefLength = ClosestRefLength(references, hypLength);
+ brevityPenalty = BrevityPenalty(closestRefLength, hypLength);
+ Assert.Equal(0.9200, brevityPenalty, 4);
+
+ references = [
+ Enumerable.Repeat("a", 13),
+ Enumerable.Repeat("a", 11),
+ ];
+ hypothesis = Enumerable.Repeat("a", 12);
+ hypLength = hypothesis.Count();
+ closestRefLength = ClosestRefLength(references, hypLength);
+ brevityPenalty = BrevityPenalty(closestRefLength, hypLength);
+ Assert.Equal(1.0, brevityPenalty, 4);
+
+ references = [
+ Enumerable.Repeat("a", 11),
+ Enumerable.Repeat("a", 13),
+ ];
+ hypothesis = Enumerable.Repeat("a", 12);
+ hypLength = hypothesis.Count();
+ closestRefLength = ClosestRefLength(references, hypLength);
+ brevityPenalty = BrevityPenalty(closestRefLength, hypLength);
+ Assert.Equal(1.0, brevityPenalty, 4);
+
+ }
+
+ [Fact]
+ public void TestZeroMatches()
+ {
+ IEnumerable> references = ["The candidate has no alignment to any of the references".Split(' '),];
+ IEnumerable hypothesis = "John loves Mary".Split(' ');
+
+ double score = SentenceBLEU(references, hypothesis, EqualWeights(hypothesis.Count()));
+ Assert.Equal(0.0, score, 4);
+ }
+
+ [Fact]
+ public void TestFullMatches()
+ {
+ IEnumerable> references = ["John loves Mary".Split(' '),];
+ IEnumerable hypothesis = "John loves Mary".Split(' ');
+
+ double score = SentenceBLEU(references, hypothesis, EqualWeights(hypothesis.Count()));
+ Assert.Equal(1.0, score, 4);
+ }
+
+ [Fact]
+ public void TestPartialMatchesHypothesisLongerThanReference()
+ {
+ IEnumerable> references = ["John loves Mary".Split(' '),];
+ IEnumerable hypothesis = "John loves Mary who loves Mike".Split(' ');
+
+ double score = SentenceBLEU(references, hypothesis);
+ Assert.Equal(0, score, 4);
+ }
+
+ [Fact]
+ public void TestSentenceBLEUExampleA()
+ {
+ IEnumerable> references = [
+ "It is a guide to action that ensures that the military will forever heed Party commands".Split(' '),
+ "It is the guiding principle which guarantees the military forces always being under the command of the Party".Split(' '),
+ "It is the practical guide for the army always to heed the directions of the party".Split(' ')
+ ];
+ IEnumerable hypothesis = "It is a guide to action which ensures that the military always obeys the commands of the party".Split(' ');
+
+ double score = SentenceBLEU(references, hypothesis);
+ Assert.Equal(0.5046, score, 4);
+
+ }
+
+ [Fact]
+ public void TestSentenceBLEUExampleB()
+ {
+ IEnumerable> references = [
+ "he was interested in world history because he read the book".Split(' '),
+ ];
+ IEnumerable hypothesis = "he read the book because he was interested in world history".Split(' ');
+
+ double score = SentenceBLEU(references, hypothesis);
+ Assert.Equal(0.74009, score, 4);
+ }
+
+ [Fact]
+ public void TestSentenceBLEUExampleAWithWordTokenizer()
+ {
+ IEnumerable> references = [
+ SimpleWordTokenizer.WordTokenize("It is a guide to action that ensures that the military will forever heed Party commands"),
+ SimpleWordTokenizer.WordTokenize("It is the guiding principle which guarantees the military forces always being under the command of the Party"),
+ SimpleWordTokenizer.WordTokenize("It is the practical guide for the army always to heed the directions of the party")
+ ];
+ IEnumerable hypothesis = SimpleWordTokenizer.WordTokenize("It is a guide to action which ensures that the military always obeys the commands of the party");
+
+ double score = SentenceBLEU(references, hypothesis);
+ Assert.Equal(0.5046, score, 4);
+
+ }
+
+ [Fact]
+ public void TestSentenceBLEUExampleBWithWordTokenizer()
+ {
+ IEnumerable> references = [
+ SimpleWordTokenizer.WordTokenize("he was interested in world history because he read the book"),
+ ];
+ IEnumerable hypothesis = SimpleWordTokenizer.WordTokenize("he read the book because he was interested in world history");
+
+ double score = SentenceBLEU(references, hypothesis);
+ Assert.Equal(0.74009, score, 4);
+ }
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.Evaluation.NLP.Tests/BLEUEvaluatorTests.cs b/test/Libraries/Microsoft.Extensions.AI.Evaluation.NLP.Tests/BLEUEvaluatorTests.cs
new file mode 100644
index 00000000000..48fda1357ab
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Evaluation.NLP.Tests/BLEUEvaluatorTests.cs
@@ -0,0 +1,113 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections.Generic;
+using System.Threading.Tasks;
+using Microsoft.Extensions.AI.Evaluation.NLP;
+using Xunit;
+
+namespace Microsoft.Extensions.AI.Evaluation.NLP.Tests;
+
+#pragma warning disable AIEVAL001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
+
+public class BLEUEvaluatorTests
+{
+ [Fact]
+ public async Task ReturnsPerfectScoreForIdenticalText()
+ {
+ var evaluator = new BLEUEvaluator();
+ var context = new BLEUEvaluatorContext("The quick brown fox jumps over the lazy dog.");
+ var response = new ChatResponse(new ChatMessage(ChatRole.Assistant, "The quick brown fox jumps over the lazy dog."));
+ var result = await evaluator.EvaluateAsync(response, chatConfiguration: null, [context]);
+ var metric = Assert.Single(result.Metrics.Values) as NumericMetric;
+ Assert.NotNull(metric);
+ Assert.Equal(BLEUEvaluator.BLEUMetricName, metric.Name);
+ Assert.Equal(1.0, (double)metric!.Value!, 4);
+ Assert.NotNull(metric.Interpretation);
+ Assert.Equal(EvaluationRating.Exceptional, metric.Interpretation.Rating);
+ Assert.False(metric.Interpretation.Failed);
+ }
+
+ [Fact]
+ public async Task ReturnsLowScoreForCompletelyDifferentText()
+ {
+ var evaluator = new BLEUEvaluator();
+ var context = new BLEUEvaluatorContext("The quick brown fox jumps over the lazy dog.");
+ var response = new ChatResponse(new ChatMessage(ChatRole.Assistant, "Completely unrelated sentence."));
+ var result = await evaluator.EvaluateAsync(response, chatConfiguration: null, [context]);
+ var metric = Assert.Single(result.Metrics.Values) as NumericMetric;
+ Assert.NotNull(metric);
+ Assert.Equal(BLEUEvaluator.BLEUMetricName, metric.Name);
+ Assert.Equal(0.0136, (double)metric!.Value!, 4);
+ Assert.NotNull(metric.Interpretation);
+ Assert.Equal(EvaluationRating.Unacceptable, metric.Interpretation.Rating);
+ Assert.True(metric.Interpretation.Failed);
+ }
+
+ [Fact]
+ public async Task ReturnsErrorDiagnosticIfNoContext()
+ {
+ var evaluator = new BLEUEvaluator();
+ var response = new ChatResponse(new ChatMessage(ChatRole.Assistant, "Some text."));
+ var result = await evaluator.EvaluateAsync(response, chatConfiguration: null, additionalContext: null);
+ var metric = Assert.Single(result.Metrics.Values) as NumericMetric;
+ Assert.NotNull(metric);
+ Assert.Equal(BLEUEvaluator.BLEUMetricName, metric.Name);
+ Assert.NotNull(metric.Diagnostics);
+ Assert.Contains(metric.Diagnostics, d => d.Severity == EvaluationDiagnosticSeverity.Error);
+ }
+
+ [Theory]
+ [InlineData("the cat is on the mat",
+ "the the the the the the the", 0.0385)]
+ [InlineData("It is a guide to action that ensures that the military will forever heed Party commands",
+ "It is a guide to action which ensures that the military always obeys the commands of the party", 0.4209)]
+ [InlineData("It is the practical guide for the army always to heed the directions of the party",
+ "It is to insure the troops forever hearing the activity guidebook that party direct", 0.0471)]
+ public async Task SampleCases(string reference, string hypothesis, double score)
+ {
+ var evaluator = new BLEUEvaluator();
+ var context = new BLEUEvaluatorContext(reference);
+ var response = new ChatResponse(new ChatMessage(ChatRole.Assistant, hypothesis));
+ var result = await evaluator.EvaluateAsync(response, chatConfiguration: null, [context]);
+ var metric = Assert.Single(result.Metrics.Values) as NumericMetric;
+ Assert.NotNull(metric);
+ Assert.Equal(BLEUEvaluator.BLEUMetricName, metric.Name);
+ Assert.Equal(score, (double)metric!.Value!, 4);
+ }
+
+ [Fact]
+ public async Task MultipleReferences()
+ {
+ string[] references = [
+ "It is a guide to action that ensures that the military will forever heed Party commands",
+ "It is the guiding principle which guarantees the military forces always being under the command of the Party",
+ "It is the practical guide for the army always to heed the directions of the party",
+ ];
+ string hypothesis = "It is a guide to action which ensures that the military always obeys the commands of the party";
+
+ var evaluator = new BLEUEvaluator();
+ var context = new BLEUEvaluatorContext(references);
+ var response = new ChatResponse(new ChatMessage(ChatRole.Assistant, hypothesis));
+ var result = await evaluator.EvaluateAsync(response, chatConfiguration: null, [context]);
+ var metric = Assert.Single(result.Metrics.Values) as NumericMetric;
+ Assert.NotNull(metric);
+ Assert.Equal(BLEUEvaluator.BLEUMetricName, metric.Name);
+ Assert.Equal(0.5046, (double)metric!.Value!, 4);
+ }
+
+ [Fact]
+ public async Task ReturnsErrorDiagnosticIfEmptyResponse()
+ {
+ var evaluator = new BLEUEvaluator();
+ var context = new BLEUEvaluatorContext("Reference text.");
+ var response = new ChatResponse(new ChatMessage(ChatRole.Assistant, ""));
+ var result = await evaluator.EvaluateAsync(response, chatConfiguration: null, [context]);
+ var metric = Assert.Single(result.Metrics.Values) as NumericMetric;
+ Assert.NotNull(metric);
+ Assert.Equal(BLEUEvaluator.BLEUMetricName, metric.Name);
+ Assert.NotNull(metric.Diagnostics);
+ Assert.Contains(metric.Diagnostics, d => d.Severity == EvaluationDiagnosticSeverity.Error);
+ }
+
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.Evaluation.NLP.Tests/MatchCounterTests.cs b/test/Libraries/Microsoft.Extensions.AI.Evaluation.NLP.Tests/MatchCounterTests.cs
new file mode 100644
index 00000000000..9c2a5b68900
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Evaluation.NLP.Tests/MatchCounterTests.cs
@@ -0,0 +1,65 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Linq;
+using Microsoft.Extensions.AI.Evaluation.NLP.Common;
+using Xunit;
+
+namespace Microsoft.Extensions.AI.Evaluation.NLP.Tests;
+
+public class MatchCounterTests
+{
+ [Fact]
+ public void EmptyConstructor_InitializesEmptyCounter()
+ {
+ var counter = new MatchCounter();
+ Assert.Empty(counter);
+ Assert.Equal(0, counter.Sum());
+ }
+
+ [Fact]
+ public void ConstructorWithItems_CountsCorrectly()
+ {
+ var counter = new MatchCounter(new[] { "a", "b", "a", "c", "b", "a" });
+ var dict = counter.ToDictionary(kv => kv.Key, kv => kv.Value);
+ Assert.Equal(3, dict["a"]);
+ Assert.Equal(2, dict["b"]);
+ Assert.Equal(1, dict["c"]);
+ Assert.Equal(6, counter.Sum());
+ }
+
+ [Fact]
+ public void Add_AddsSingleItemCorrectly()
+ {
+ var counter = new MatchCounter();
+ counter.Add(5);
+ counter.Add(5);
+ counter.Add(3);
+ var dict = counter.ToDictionary(kv => kv.Key, kv => kv.Value);
+ Assert.Equal(2, dict[5]);
+ Assert.Equal(1, dict[3]);
+ Assert.Equal(3, counter.Sum());
+ }
+
+ [Fact]
+ public void AddRange_AddsMultipleItemsCorrectly()
+ {
+ var counter = new MatchCounter();
+ counter.AddRange("hello");
+ var dict = counter.ToDictionary(kv => kv.Key, kv => kv.Value);
+ Assert.Equal(1, dict['h']);
+ Assert.Equal(1, dict['e']);
+ Assert.Equal(2, dict['l']);
+ Assert.Equal(1, dict['o']);
+ Assert.Equal(5, counter.Sum());
+ }
+
+ [Fact]
+ public void ToDebugString_FormatsCorrectly()
+ {
+ var counter = new MatchCounter(new[] { "x", "y", "x" });
+ var str = counter.ToDebugString();
+ Assert.Contains("x: 2", str);
+ Assert.Contains("y: 1", str);
+ }
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.Evaluation.NLP.Tests/Microsoft.Extensions.AI.Evaluation.NLP.Tests.csproj b/test/Libraries/Microsoft.Extensions.AI.Evaluation.NLP.Tests/Microsoft.Extensions.AI.Evaluation.NLP.Tests.csproj
new file mode 100644
index 00000000000..6b485136520
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Evaluation.NLP.Tests/Microsoft.Extensions.AI.Evaluation.NLP.Tests.csproj
@@ -0,0 +1,13 @@
+
+
+
+ Microsoft.Extensions.AI.Evaluation.NLP.Tests
+ Unit tests for Microsoft.Extensions.AI.Evaluation.NLP.
+
+
+
+
+
+
+
+
diff --git a/test/Libraries/Microsoft.Extensions.AI.Evaluation.NLP.Tests/NGramTests.cs b/test/Libraries/Microsoft.Extensions.AI.Evaluation.NLP.Tests/NGramTests.cs
new file mode 100644
index 00000000000..d782c3c8f88
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Evaluation.NLP.Tests/NGramTests.cs
@@ -0,0 +1,80 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Microsoft.Extensions.AI.Evaluation.NLP.Common;
+using Xunit;
+
+namespace Microsoft.Extensions.AI.Evaluation.NLP.Tests;
+
+public class NGramTests
+{
+ [Fact]
+ public void Constructor_ValuesAndLength()
+ {
+ var ngram = new NGram(1, 2, 3);
+ Assert.Equal(new[] { 1, 2, 3 }, ngram.Values);
+ Assert.Equal(3, ngram.Length);
+ }
+
+ [Fact]
+ public void Constructor_ThrowsOnEmpty()
+ {
+ Assert.Throws(() => new NGram(Array.Empty()));
+ }
+
+ [Fact]
+ public void Equals_And_HashCode_WorkCorrectly()
+ {
+ var a = new NGram(1, 2, 3);
+ var b = new NGram(1, 2, 3);
+ var c = new NGram(3, 2, 1);
+ Assert.True(a.Equals(b));
+ Assert.True(a.Equals((object)b));
+ Assert.False(a.Equals(c));
+ Assert.NotEqual(a.GetHashCode(), c.GetHashCode());
+ }
+
+ [Fact]
+ public void Enumerator_And_IEnumerable()
+ {
+ var ngram = new NGram('a', 'b', 'c');
+ var list = ngram.ToList();
+ Assert.Equal(new[] { 'a', 'b', 'c' }, list);
+ }
+
+ [Fact]
+ public void ToDebugString_FormatsCorrectly()
+ {
+ var ngram = new NGram("x", "y");
+ Assert.Equal("[x,y]", ngram.ToDebugString());
+ }
+
+ [Fact]
+ public void NGramBuilder_Create_Works()
+ {
+ NGram ngram = [1, 2];
+ Assert.Equal(new NGram(1, 2), ngram);
+ }
+
+ [Fact]
+ public void NGramGenerationNoPadding()
+ {
+ int[] input = [1, 2, 3, 4, 5];
+
+ IEnumerable> result = input.CreateNGrams(1);
+ List> expected = [[1], [2], [3], [4], [5]];
+ Assert.True(result.SequenceEqual(expected));
+
+ result = input.CreateNGrams(2);
+ expected = [[1, 2], [2, 3], [3, 4], [4, 5]];
+ Assert.True(result.SequenceEqual(expected));
+
+ result = input.CreateNGrams(3);
+ expected = [[1, 2, 3], [2, 3, 4], [3, 4, 5]];
+ Assert.True(result.SequenceEqual(expected));
+ }
+
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.Evaluation.NLP.Tests/RationalNumberTests.cs b/test/Libraries/Microsoft.Extensions.AI.Evaluation.NLP.Tests/RationalNumberTests.cs
new file mode 100644
index 00000000000..8776b97811f
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Evaluation.NLP.Tests/RationalNumberTests.cs
@@ -0,0 +1,55 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using Microsoft.Extensions.AI.Evaluation.NLP.Common;
+using Xunit;
+
+namespace Microsoft.Extensions.AI.Evaluation.NLP.Tests;
+
+public class RationalNumberTests
+{
+ [Fact]
+ public void Constructor_StoresNumeratorAndDenominator()
+ {
+ var r = new RationalNumber(3, 4);
+ Assert.Equal(3, r.Numerator);
+ Assert.Equal(4, r.Denominator);
+ }
+
+ [Fact]
+ public void Constructor_ThrowsOnZeroDenominator()
+ {
+ Assert.Throws(() => new RationalNumber(1, 0));
+ }
+
+ [Theory]
+ [InlineData(1, 2, 0.5)]
+ [InlineData(-3, 4, -0.75)]
+ [InlineData(0, 5, 0.0)]
+ public void ToDouble_ReturnsExpected(int num, int denom, double expected)
+ {
+ var r = new RationalNumber(num, denom);
+ Assert.Equal(expected, r.ToDouble(), 6);
+ }
+
+ [Fact]
+ public void ToDebugString_FormatsCorrectly()
+ {
+ var r = new RationalNumber(7, 9);
+ Assert.Equal("7/9", r.ToDebugString());
+ }
+
+ [Fact]
+ public void Equals_And_HashCode_WorkCorrectly()
+ {
+ var a = new RationalNumber(2, 3);
+ var b = new RationalNumber(2, 3);
+ var c = new RationalNumber(3, 2);
+ Assert.True(a.Equals(b));
+ Assert.True(a.Equals((object)b));
+ Assert.False(a.Equals(c));
+ Assert.Equal(a.GetHashCode(), b.GetHashCode());
+ Assert.NotEqual(a.GetHashCode(), c.GetHashCode());
+ }
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.Evaluation.NLP.Tests/SimpleTokenizerTests.cs b/test/Libraries/Microsoft.Extensions.AI.Evaluation.NLP.Tests/SimpleTokenizerTests.cs
new file mode 100644
index 00000000000..3451a6c38c9
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Evaluation.NLP.Tests/SimpleTokenizerTests.cs
@@ -0,0 +1,72 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Microsoft.Extensions.AI.Evaluation.NLP.Common;
+using Xunit;
+
+namespace Microsoft.Extensions.AI.Evaluation.NLP.Tests;
+
+#pragma warning disable AIEVAL001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
+
+public class SimpleTokenizerTests
+{
+ [Theory]
+ [InlineData(" $41.23 ", new[] { "$", "41.23" })]
+ [InlineData("word", new[] { "WORD" })]
+ [InlineData("word1 word2", new[] { "WORD1", "WORD2" })]
+ [InlineData("word1,word2", new[] { "WORD1", ",", "WORD2" })]
+ [InlineData("word1.word2", new[] { "WORD1", ".", "WORD2" })]
+ [InlineData("word1!word2?", new[] { "WORD1", "!", "WORD2", "?" })]
+ [InlineData("word1-word2", new[] { "WORD1", "-", "WORD2" })]
+ [InlineData("word1 - word2", new[] { "WORD1", "-", "WORD2" })]
+ [InlineData("word1-\n word2", new[] { "WORD1", "WORD2" })]
+ [InlineData("word1-\r\n word2", new[] { "WORD1", "WORD2" })]
+ [InlineData("word1-\r\nword2", new[] { "WORD1WORD2" })]
+ [InlineData("word1-\nword2", new[] { "WORD1WORD2" })]
+ [InlineData("word1\nword2", new[] { "WORD1", "WORD2" })]
+ [InlineData("word1 \n word2", new[] { "WORD1", "WORD2" })]
+ [InlineData("word1\r\nword2", new[] { "WORD1", "WORD2" })]
+ [InlineData("word1 \r\n word2", new[] { "WORD1", "WORD2" })]
+ [InlineData("word1\tword2", new[] { "WORD1", "WORD2" })]
+ [InlineData("It is a guide to action that ensures that the military will forever heed Party commands.",
+ new[] { "IT", "IS", "A", "GUIDE", "TO", "ACTION", "THAT", "ENSURES", "THAT", "THE", "MILITARY", "WILL", "FOREVER", "HEED", "PARTY", "COMMANDS", "." })]
+ [InlineData("Good muffins cost $3.88 (roughly 3,36 euros)\nin New York. Please buy me\ntwo of them.\nThanks.",
+ new[] { "GOOD", "MUFFINS", "COST", "$", "3.88", "(", "ROUGHLY", "3,36", "EUROS", ")", "IN", "NEW", "YORK", ".", "PLEASE", "BUY", "ME", "TWO", "OF", "THEM", ".", "THANKS", "." })]
+ [InlineData("", new string[0])]
+ [InlineData(" This is a test.", new[] { "THIS", "IS", "A", "TEST", "." })]
+ [InlineData("Hello, world! How's it going?", new[] { "HELLO", ",", "WORLD", "!", "HOW", "'", "S", "IT", "GOING", "?" })]
+ [InlineData(""Quotes" and & symbols < > '", new[] { "\"", "QUOTES", "\"", "AND", "&", "SYMBOLS", "<", ">", "'" })]
+ [InlineData("-\nThis is a test.", new[] { "THIS", "IS", "A", "TEST", "." })]
+ public void Tokenize_Cases(string input, string[] expected)
+ {
+ var result = SimpleWordTokenizer.WordTokenize(input);
+ Assert.Equal(expected, result);
+ }
+
+ [Fact]
+ public void HandlesMultipleSpacesAndEmptyEntries()
+ {
+ var input = " word1 word2 word3 ";
+ var expected = new[] { "WORD1", "WORD2", "WORD3" };
+ var result = SimpleWordTokenizer.WordTokenize(input);
+ Assert.Equal(expected, result);
+ }
+
+ [Fact]
+ public void HandlesUnicodeSymbolsAndPunctuation()
+ {
+ var input = "word1 © word2 ™ word3 — word4";
+ var expected = new[] { "WORD1", "©", "WORD2", "™", "WORD3", "—", "WORD4" };
+ var result = SimpleWordTokenizer.WordTokenize(input);
+ Assert.Equal(expected, result);
+ }
+
+ [Fact]
+ public void HandlesHtmlEntities()
+ {
+ var input = ""Hello" & Goodbye <test> '";
+ var expected = new[] { "\"", "HELLO", "\"", "&", "GOODBYE", "<", "TEST", ">", "'" };
+ var result = SimpleWordTokenizer.WordTokenize(input);
+ Assert.Equal(expected, result);
+ }
+}