Skip to content

Commit c2f0174

Browse files
committed
Merged PR 48268: Address M.E.VectorData feedback for IEmbeddingGenerator (#6058)
Address M.E.VectorData feedback for IEmbeddingGenerator (#6058) * Move GetService down to a non-generic IEmbeddingGenerator interface * Separate UriContent from DataContent
2 parents 6883e99 + 5ebe949 commit c2f0174

27 files changed

+529
-305
lines changed

src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/AIContent.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ namespace Microsoft.Extensions.AI;
1111
[JsonDerivedType(typeof(FunctionCallContent), typeDiscriminator: "functionCall")]
1212
[JsonDerivedType(typeof(FunctionResultContent), typeDiscriminator: "functionResult")]
1313
[JsonDerivedType(typeof(TextContent), typeDiscriminator: "text")]
14+
[JsonDerivedType(typeof(UriContent), typeDiscriminator: "uri")]
1415
[JsonDerivedType(typeof(UsageContent), typeDiscriminator: "usage")]
1516
public class AIContent
1617
{

src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataContent.cs

Lines changed: 69 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,17 @@
88
using Microsoft.Shared.Diagnostics;
99

1010
#pragma warning disable S3996 // URI properties should not be strings
11+
#pragma warning disable CA1054 // URI-like parameters should not be strings
1112
#pragma warning disable CA1056 // URI-like properties should not be strings
1213

1314
namespace Microsoft.Extensions.AI;
1415

1516
/// <summary>
16-
/// Represents data content, such as an image or audio.
17+
/// Represents binary content with an associated media type (also known as MIME type).
1718
/// </summary>
1819
/// <remarks>
1920
/// <para>
20-
/// The represented content may either be the actual bytes stored in this instance, or it may
21-
/// be a URI that references the location of the content.
21+
/// The content represents in-memory data. For references to data at a remote URI, use <see cref="UriContent"/> instead.
2222
/// </para>
2323
/// <para>
2424
/// <see cref="Uri"/> always returns a valid URI string, even if the instance was constructed from
@@ -32,20 +32,27 @@ public class DataContent : AIContent
3232
// Ideally DataContent would be based in terms of Uri. However, Uri has a length limitation that makes it prohibitive
3333
// for the kinds of data URIs necessary to support here. As such, this type is based in strings.
3434

35+
/// <summary>Parsed data URI information.</summary>
36+
private readonly DataUriParser.DataUri? _dataUri;
37+
3538
/// <summary>The string-based representation of the URI, including any data in the instance.</summary>
3639
private string? _uri;
3740

3841
/// <summary>The data, lazily initialized if the data is provided in a data URI.</summary>
3942
private ReadOnlyMemory<byte>? _data;
4043

41-
/// <summary>Parsed data URI information.</summary>
42-
private DataUriParser.DataUri? _dataUri;
43-
4444
/// <summary>
4545
/// Initializes a new instance of the <see cref="DataContent"/> class.
4646
/// </summary>
47-
/// <param name="uri">The URI of the content. This can be a data URI.</param>
48-
/// <param name="mediaType">The media type (also known as MIME type) represented by the content.</param>
47+
/// <param name="uri">The data URI containing the content.</param>
48+
/// <param name="mediaType">
49+
/// The media type (also known as MIME type) represented by the content. If not provided,
50+
/// it must be provided as part of the <paramref name="uri"/>.
51+
/// </param>
52+
/// <exception cref="ArgumentNullException"><paramref name="uri"/> is <see langword="null"/>.</exception>
53+
/// <exception cref="ArgumentException"><paramref name="uri"/> is not a data URI.</exception>
54+
/// <exception cref="ArgumentException"><paramref name="uri"/> did not contain a media type and <paramref name="mediaType"/> was not supplied.</exception>
55+
/// <exception cref="ArgumentException"><paramref name="mediaType"/> is an invalid media type.</exception>
4956
public DataContent(Uri uri, string? mediaType = null)
5057
: this(Throw.IfNull(uri).ToString(), mediaType)
5158
{
@@ -54,75 +61,78 @@ public DataContent(Uri uri, string? mediaType = null)
5461
/// <summary>
5562
/// Initializes a new instance of the <see cref="DataContent"/> class.
5663
/// </summary>
57-
/// <param name="uri">The URI of the content. This can be a data URI.</param>
64+
/// <param name="uri">The data URI containing the content.</param>
5865
/// <param name="mediaType">The media type (also known as MIME type) represented by the content.</param>
66+
/// <exception cref="ArgumentNullException"><paramref name="uri"/> is <see langword="null"/>.</exception>
67+
/// <exception cref="ArgumentException"><paramref name="uri"/> is not a data URI.</exception>
68+
/// <exception cref="ArgumentException"><paramref name="uri"/> did not contain a media type and <paramref name="mediaType"/> was not supplied.</exception>
69+
/// <exception cref="ArgumentException"><paramref name="mediaType"/> is an invalid media type.</exception>
5970
[JsonConstructor]
6071
public DataContent([StringSyntax(StringSyntaxAttribute.Uri)] string uri, string? mediaType = null)
6172
{
6273
_uri = Throw.IfNullOrWhitespace(uri);
6374

64-
ValidateMediaType(ref mediaType);
65-
MediaType = mediaType;
66-
67-
if (uri.StartsWith(DataUriParser.Scheme, StringComparison.OrdinalIgnoreCase))
75+
if (!uri.StartsWith(DataUriParser.Scheme, StringComparison.OrdinalIgnoreCase))
6876
{
69-
_dataUri = DataUriParser.Parse(uri.AsMemory());
77+
Throw.ArgumentException(nameof(uri), "The provided URI is not a data URI.");
78+
}
7079

71-
// If the data URI contains a media type that's different from a non-null media type
72-
// explicitly provided, prefer the one explicitly provided as an override.
73-
if (MediaType is not null)
74-
{
75-
if (MediaType != _dataUri.MediaType)
76-
{
77-
// Extract the bytes from the data URI and null out the uri.
78-
// Then we'll lazily recreate it later if needed based on the updated media type.
79-
_data = _dataUri.ToByteArray();
80-
_dataUri = null;
81-
_uri = null;
82-
}
83-
}
84-
else
80+
_dataUri = DataUriParser.Parse(uri.AsMemory());
81+
82+
if (mediaType is null)
83+
{
84+
mediaType = _dataUri.MediaType;
85+
if (mediaType is null)
8586
{
86-
MediaType = _dataUri.MediaType;
87+
Throw.ArgumentNullException(nameof(mediaType), $"{nameof(uri)} did not contain a media type, and {nameof(mediaType)} was not provided.");
8788
}
8889
}
89-
else if (!System.Uri.TryCreate(uri, UriKind.Absolute, out _))
90+
else
9091
{
91-
throw new UriFormatException("The URI is not well-formed.");
92+
if (mediaType != _dataUri.MediaType)
93+
{
94+
// If the data URI contains a media type that's different from a non-null media type
95+
// explicitly provided, prefer the one explicitly provided as an override.
96+
97+
// Extract the bytes from the data URI and null out the uri.
98+
// Then we'll lazily recreate it later if needed based on the updated media type.
99+
_data = _dataUri.ToByteArray();
100+
_dataUri = null;
101+
_uri = null;
102+
}
92103
}
104+
105+
MediaType = DataUriParser.ThrowIfInvalidMediaType(mediaType);
93106
}
94107

95108
/// <summary>
96109
/// Initializes a new instance of the <see cref="DataContent"/> class.
97110
/// </summary>
98111
/// <param name="data">The byte contents.</param>
99112
/// <param name="mediaType">The media type (also known as MIME type) represented by the content.</param>
100-
public DataContent(ReadOnlyMemory<byte> data, string? mediaType = null)
113+
/// <exception cref="ArgumentNullException"><paramref name="mediaType"/> is null.</exception>
114+
/// <exception cref="ArgumentException"><paramref name="mediaType"/> is empty or composed entirely of whitespace.</exception>
115+
public DataContent(ReadOnlyMemory<byte> data, string mediaType)
101116
{
102-
ValidateMediaType(ref mediaType);
103-
MediaType = mediaType;
117+
MediaType = DataUriParser.ThrowIfInvalidMediaType(mediaType);
104118

105119
_data = data;
106120
}
107121

108122
/// <summary>
109-
/// Determines whether the <see cref="MediaType"/> has the specified prefix.
123+
/// Determines whether the <see cref="MediaType"/>'s top-level type matches the specified <paramref name="topLevelType"/>.
110124
/// </summary>
111-
/// <param name="prefix">The media type prefix.</param>
112-
/// <returns><see langword="true"/> if the <see cref="MediaType"/> has the specified prefix, otherwise <see langword="false"/>.</returns>
113-
public bool MediaTypeStartsWith(string prefix)
114-
=> MediaType?.StartsWith(prefix, StringComparison.OrdinalIgnoreCase) is true;
115-
116-
/// <summary>Sets <paramref name="mediaType"/> to null if it's empty or composed entirely of whitespace.</summary>
117-
private static void ValidateMediaType(ref string? mediaType)
118-
{
119-
if (!DataUriParser.IsValidMediaType(mediaType.AsSpan(), ref mediaType))
120-
{
121-
Throw.ArgumentException(nameof(mediaType), "Invalid media type.");
122-
}
123-
}
125+
/// <param name="topLevelType">The type to compare against <see cref="MediaType"/>.</param>
126+
/// <returns><see langword="true"/> if the type portion of <see cref="MediaType"/> matches the specified value; otherwise, false.</returns>
127+
/// <remarks>
128+
/// A media type is primarily composed of two parts, a "type" and a "subtype", separated by a slash ("/").
129+
/// The type portion is also referred to as the "top-level type"; for example,
130+
/// "image/png" has a top-level type of "image". <see cref="HasTopLevelMediaType"/> compares
131+
/// the specified <paramref name="topLevelType"/> against the type portion of <see cref="MediaType"/>.
132+
/// </remarks>
133+
public bool HasTopLevelMediaType(string topLevelType) => DataUriParser.HasTopLevelMediaType(MediaType, topLevelType);
124134

125-
/// <summary>Gets the URI for this <see cref="DataContent"/>.</summary>
135+
/// <summary>Gets the data URI for this <see cref="DataContent"/>.</summary>
126136
/// <remarks>
127137
/// The returned URI is always a valid URI string, even if the instance was constructed from a <see cref="ReadOnlyMemory{Byte}"/>
128138
/// or from a <see cref="System.Uri"/>. In the case of a <see cref="ReadOnlyMemory{T}"/>, this property returns a data URI containing
@@ -137,8 +147,8 @@ public string Uri
137147
{
138148
if (_dataUri is null)
139149
{
140-
Debug.Assert(Data is not null, "Expected Data to be initialized.");
141-
_uri = string.Concat("data:", MediaType, ";base64,", Convert.ToBase64String(Data.GetValueOrDefault()
150+
Debug.Assert(_data is not null, "Expected _data to be initialized.");
151+
_uri = string.Concat("data:", MediaType, ";base64,", Convert.ToBase64String(_data.GetValueOrDefault()
142152
#if NET
143153
.Span));
144154
#else
@@ -167,10 +177,9 @@ public string Uri
167177
/// If the media type was explicitly specified, this property returns that value.
168178
/// If the media type was not explicitly specified, but a data URI was supplied and that data URI contained a non-default
169179
/// media type, that media type is returned.
170-
/// Otherwise, this property returns null.
171180
/// </remarks>
172-
[JsonPropertyOrder(1)]
173-
public string? MediaType { get; private set; }
181+
[JsonIgnore]
182+
public string MediaType { get; }
174183

175184
/// <summary>Gets the data represented by this instance.</summary>
176185
/// <remarks>
@@ -181,16 +190,18 @@ public string Uri
181190
/// no attempt is made to retrieve the data from that URI.
182191
/// </remarks>
183192
[JsonIgnore]
184-
public ReadOnlyMemory<byte>? Data
193+
public ReadOnlyMemory<byte> Data
185194
{
186195
get
187196
{
188-
if (_dataUri is not null)
197+
if (_data is null)
189198
{
190-
_data ??= _dataUri.ToByteArray();
199+
Debug.Assert(_dataUri is not null, "Expected dataUri to be initialized.");
200+
_data = _dataUri!.ToByteArray();
191201
}
192202

193-
return _data;
203+
Debug.Assert(_data is not null, "Expected data to be initialized.");
204+
return _data.GetValueOrDefault();
194205
}
195206
}
196207

src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,14 @@
55
#if NET8_0_OR_GREATER
66
using System.Buffers.Text;
77
#endif
8-
using System.Diagnostics;
8+
using System.Diagnostics.CodeAnalysis;
99
using System.Net;
1010
using System.Net.Http.Headers;
11+
using System.Runtime.CompilerServices;
1112
using System.Text;
13+
using Microsoft.Shared.Diagnostics;
14+
15+
#pragma warning disable CA1307 // Specify StringComparison for clarity
1216

1317
namespace Microsoft.Extensions.AI;
1418

@@ -55,29 +59,35 @@ public static DataUri Parse(ReadOnlyMemory<char> dataUri)
5559
}
5660

5761
// Validate the media type, if present.
62+
ReadOnlySpan<char> span = metadata.Span.Trim();
5863
string? mediaType = null;
59-
if (!IsValidMediaType(metadata.Span.Trim(), ref mediaType))
64+
if (!span.IsEmpty && !IsValidMediaType(span, ref mediaType))
6065
{
6166
throw new UriFormatException("Invalid data URI format: the media type is not a valid.");
6267
}
6368

6469
return new DataUri(data, isBase64, mediaType);
6570
}
6671

67-
/// <summary>Validates that a media type is valid, and if successful, ensures we have it as a string.</summary>
68-
public static bool IsValidMediaType(ReadOnlySpan<char> mediaTypeSpan, ref string? mediaType)
72+
public static string ThrowIfInvalidMediaType(
73+
string mediaType, [CallerArgumentExpression(nameof(mediaType))] string parameterName = "")
6974
{
70-
Debug.Assert(
71-
mediaType is null || mediaTypeSpan.Equals(mediaType.AsSpan(), StringComparison.Ordinal),
72-
"mediaType string should either be null or the same as the span");
75+
_ = Throw.IfNullOrWhitespace(mediaType, parameterName);
7376

74-
// If the media type is empty or all whitespace, normalize it to null.
75-
if (mediaTypeSpan.IsWhiteSpace())
77+
if (!IsValidMediaType(mediaType))
7678
{
77-
mediaType = null;
78-
return true;
79+
Throw.ArgumentException(parameterName, $"An invalid media type was specified: '{mediaType}'");
7980
}
8081

82+
return mediaType;
83+
}
84+
85+
public static bool IsValidMediaType(string mediaType) =>
86+
IsValidMediaType(mediaType.AsSpan(), ref mediaType);
87+
88+
/// <summary>Validates that a media type is valid, and if successful, ensures we have it as a string.</summary>
89+
public static bool IsValidMediaType(ReadOnlySpan<char> mediaTypeSpan, [NotNull] ref string? mediaType)
90+
{
8191
// For common media types, we can avoid both allocating a string for the span and avoid parsing overheads.
8292
string? knownType = mediaTypeSpan switch
8393
{
@@ -108,7 +118,7 @@ public static bool IsValidMediaType(ReadOnlySpan<char> mediaTypeSpan, ref string
108118
};
109119
if (knownType is not null)
110120
{
111-
mediaType ??= knownType;
121+
mediaType = knownType;
112122
return true;
113123
}
114124

@@ -117,6 +127,16 @@ public static bool IsValidMediaType(ReadOnlySpan<char> mediaTypeSpan, ref string
117127
return MediaTypeHeaderValue.TryParse(mediaType, out _);
118128
}
119129

130+
public static bool HasTopLevelMediaType(string mediaType, string topLevelMediaType)
131+
{
132+
int slashIndex = mediaType.IndexOf('/');
133+
134+
ReadOnlySpan<char> span = slashIndex < 0 ? mediaType.AsSpan() : mediaType.AsSpan(0, slashIndex);
135+
span = span.Trim();
136+
137+
return span.Equals(topLevelMediaType.AsSpan(), StringComparison.OrdinalIgnoreCase);
138+
}
139+
120140
/// <summary>Test whether the value is a base64 string without whitespace.</summary>
121141
private static bool IsValidBase64Data(ReadOnlySpan<char> value)
122142
{

0 commit comments

Comments
 (0)