Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions Elzik.Breef.Api.http
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
@Elzik.Breef.Api_HostAddress = http://localhost:5079

### HTML

Post {{Elzik.Breef.Api_HostAddress}}/breefs
Content-Type: application/json
BREEF-API-KEY: test-key
{
"url":"https://www.positive.news/society/swiping-less-living-more-how-to-take-control-of-our-digital-lives/"
}

### Reddit Post

Post {{Elzik.Breef.Api_HostAddress}}/breefs
Content-Type: application/json
BREEF-API-KEY: test-key
{
"url":"https://www.reddit.com/r/selfhosted/comments/1ojndg6/advice_should_i_buy_a_new_router_or_build_one/"
}

### Subreddit

Post {{Elzik.Breef.Api_HostAddress}}/breefs
Content-Type: application/json
BREEF-API-KEY: test-key
{
"url":"https://www.reddit.com/r/dotnet/"
}

### Health

Get {{Elzik.Breef.Api_HostAddress}}/health
Content-Type: application/json
6 changes: 6 additions & 0 deletions Elzik.Breef.sln
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Elzik.Breef.Infrastructure.
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Elzik.Breef.Api.Tests.Integration", "tests\Elzik.Breef.Api.Tests.Integration\Elzik.Breef.Api.Tests.Integration.csproj", "{F1FD6E10-FB87-4BBD-B853-05889A04CFA9}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SampleRequests", "SampleRequests", "{02EA681E-C7D8-13C7-8484-4AC65E1B71E8}"
ProjectSection(SolutionItems) = preProject
Elzik.Breef.Api.http = Elzik.Breef.Api.http
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -88,6 +93,7 @@ Global
{7F0140C3-DDE1-4248-9FCC-980699F2D05A} = {C0C3A88F-44EC-4383-96FF-3BE016F2122B}
{6CACB8E5-4B4C-4C05-AD80-32561037DAF0} = {C0C3A88F-44EC-4383-96FF-3BE016F2122B}
{F1FD6E10-FB87-4BBD-B853-05889A04CFA9} = {C0C3A88F-44EC-4383-96FF-3BE016F2122B}
{02EA681E-C7D8-13C7-8484-4AC65E1B71E8} = {C0C3A88F-44EC-4383-96FF-3BE016F2122B}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {6E3F0693-A48D-48DC-935C-2D8758390BC2}
Expand Down
16 changes: 0 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,22 +120,6 @@ Example:
}
```

#### AI Content Summariser

The AI model will use these settings when generating summaries. Although the model may not adhere to these settings, they will influence the resulting summary.

- **TargetSummaryLengthPercentage** - Sets the size of the summary with respect to the size of the original article. The default used if not set is 10%.
- **TargetSummaryMaxWordCount** - Sets the maximum number of words for the summary generated. The default used if not set is 200 words.

Example:

```jsonc
"AiContentSummariser": {
"TargetSummaryLengthPercentage": 10, // breef_AiContentSummariser__TargetSummaryLengthPercentage
"TargetSummaryMaxWordCount": 200 // breef_AiContentSummariser__TargetSummaryMaxWordCount
}
```

#### Web Page Downloader

These settings affect how pages are downloaded prior to being summarised.
Expand Down
15 changes: 0 additions & 15 deletions src/Elzik.Breef.Api/Elzik.Breef.Api.http

This file was deleted.

13 changes: 9 additions & 4 deletions src/Elzik.Breef.Api/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
{
options.AddDefaultPolicy(builder =>
{
builder.AllowAnyOrigin()

Check warning on line 52 in src/Elzik.Breef.Api/Program.cs

View workflow job for this annotation

GitHub Actions / build-ubuntu

Make sure this permissive CORS policy is safe here. (https://rules.sonarsource.com/csharp/RSPEC-5122)

Check warning on line 52 in src/Elzik.Breef.Api/Program.cs

View workflow job for this annotation

GitHub Actions / build-ubuntu

Make sure this permissive CORS policy is safe here. (https://rules.sonarsource.com/csharp/RSPEC-5122)

Check warning on line 52 in src/Elzik.Breef.Api/Program.cs

View workflow job for this annotation

GitHub Actions / build-ubuntu

Make sure this permissive CORS policy is safe here. (https://rules.sonarsource.com/csharp/RSPEC-5122)

Check warning on line 52 in src/Elzik.Breef.Api/Program.cs

View workflow job for this annotation

GitHub Actions / build-ubuntu

Make sure this permissive CORS policy is safe here. (https://rules.sonarsource.com/csharp/RSPEC-5122)
.AllowAnyMethod()
.AllowAnyHeader();
});
Expand Down Expand Up @@ -117,12 +117,17 @@
.Bind(configuration.GetSection("AiService"))
.ValidateDataAnnotations()
.ValidateOnStart();
builder.Services.AddOptions<AiContentSummariserOptions>()
.Bind(configuration.GetSection("AiContentSummariser"))
.ValidateDataAnnotations()
.ValidateOnStart();
builder.Services.AddAiContentSummariser();

builder.Services.AddSingleton<IContentSummarisationInstructionProvider>(sp =>
{
var logger = sp.GetRequiredService<ILogger<FileBasedContentSummarisationInstructionProvider>>();
return new FileBasedContentSummarisationInstructionProvider(
logger,
Path.Combine(AppContext.BaseDirectory, "SummarisationInstructions"),
["HtmlContent", "RedditPostContent", "SubredditContent"]);
});

builder.Services.AddOptions<WallabagOptions>()
.Bind(configuration.GetSection("Wallabag"))
.ValidateDataAnnotations()
Expand Down
12 changes: 8 additions & 4 deletions src/Elzik.Breef.Application/BreefGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,21 @@

namespace Elzik.Breef.Application
{
public class BreefGenerator(
public class BreefGenerator(
IContentExtractor contentExtractor,
IContentSummariser contentSummariser,
IContentSummariser contentSummariser,
IContentSummarisationInstructionProvider instructionProvider,
IBreefPublisher breefPublisher) : IBreefGenerator
{
public async Task<PublishedBreef> GenerateBreefAsync(string url)
{
var extract = await contentExtractor.ExtractAsync(url);
var summary = await contentSummariser.SummariseAsync(extract.Content);

var breef = new Domain.Breef(url, extract.Title ,summary, extract.PreviewImageUrl);
var instructions = instructionProvider.GetInstructions(extract.ExtractType);

var summary = await contentSummariser.SummariseAsync(extract.Content, instructions);

var breef = new Domain.Breef(url, extract.Title, summary, extract.PreviewImageUrl);

var publishedBreef = await breefPublisher.PublishAsync(breef);

Expand Down
2 changes: 1 addition & 1 deletion src/Elzik.Breef.Domain/Extract.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
namespace Elzik.Breef.Domain;

public record Extract(string Title, string Content, string? PreviewImageUrl);
public record Extract(string Title, string Content, string? PreviewImageUrl, string ExtractType);
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
namespace Elzik.Breef.Domain;

public interface IContentSummarisationInstructionProvider
{
string GetInstructions(string extractTypeName);
}
2 changes: 1 addition & 1 deletion src/Elzik.Breef.Domain/IContentSummariser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
{
public interface IContentSummariser
{
Task<string> SummariseAsync(string content);
Task<string> SummariseAsync(string content, string instructions);
}
}
18 changes: 4 additions & 14 deletions src/Elzik.Breef.Infrastructure/AI/AiContentSummariser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,15 @@ namespace Elzik.Breef.Infrastructure.AI;

public class AiContentSummariser(
ILogger<AiContentSummariser> logger,
IChatCompletionService Chat,
IOptions<AiContentSummariserOptions> summariserOptions) : IContentSummariser
IChatCompletionService Chat) : IContentSummariser
{
public async Task<string> SummariseAsync(string content)
public async Task<string> SummariseAsync(string content, string instructions)
{
var systemPrompt = @$"
You are an expert summarizer. Your task is to summarize the provided text:
- Summarise text, including HTML entities.
- Limit summaries to {summariserOptions.Value.TargetSummaryLengthPercentage}% of the original length but never more then {summariserOptions.Value.TargetSummaryMaxWordCount} words.
- Ensure accurate attribution of information to the correct entities.
- Do not include a link to the original articles.
- Do not include the title in the response.
- Do not include any metadata in the response.
- Do not include a code block in the response.";

ArgumentNullException.ThrowIfNullOrWhiteSpace(instructions);
var formattingInstructions = "Summarise this content in an HTML format using paragraphs and " +
"bullet points to enhance readability\n:";

var chatHistory = new ChatHistory(systemPrompt);
var chatHistory = new ChatHistory(instructions);
chatHistory.AddMessage(AuthorRole.User, $"{formattingInstructions}{content}");

var result = await Chat.GetChatMessageContentAsync(chatHistory);
Expand Down
12 changes: 0 additions & 12 deletions src/Elzik.Breef.Infrastructure/AI/AiContentSummariserOptions.cs

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
using Elzik.Breef.Domain;
using Microsoft.Extensions.Logging;

namespace Elzik.Breef.Infrastructure.AI;

public sealed class FileBasedContentSummarisationInstructionProvider : IContentSummarisationInstructionProvider
{
private readonly Dictionary<string, string> _templatesByKey = new(StringComparer.OrdinalIgnoreCase);

public FileBasedContentSummarisationInstructionProvider(
ILogger<FileBasedContentSummarisationInstructionProvider> logger,
string instructionFileDirectoryPath,
IEnumerable<string> requiredExtractTypeNames)
{
if (!Directory.Exists(instructionFileDirectoryPath))
{
throw new DirectoryNotFoundException($"Summarisation instructions directory not found at: {instructionFileDirectoryPath}");
}

if (requiredExtractTypeNames == null || !requiredExtractTypeNames.Any())
{
throw new ArgumentException("At least one required extract instruction must be specified.", nameof(requiredExtractTypeNames));
}

foreach (var extractTypeName in requiredExtractTypeNames)
{
var filePath = Path.Combine(instructionFileDirectoryPath, $"{extractTypeName}.md");

if (!File.Exists(filePath))
{
throw new InvalidOperationException($"Missing summarisation instruction file: {filePath}");
}

var instructions = File.ReadAllText(filePath);

if (string.IsNullOrWhiteSpace(instructions))
{
throw new InvalidOperationException($"Summarisation instruction file is empty: {filePath}");
}

_templatesByKey[extractTypeName] = instructions;
logger.LogInformation("Loaded summarisation template for {Key} from {FilePath}", extractTypeName, filePath);
}
}

public string GetInstructions(string extractTypeName)
{
if (_templatesByKey.TryGetValue(extractTypeName, out var instructions))
{
return instructions;
}

throw new InvalidOperationException(
$"No summarisation instructions found for content type '{extractTypeName}'.");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
using Elzik.Breef.Domain;

namespace Elzik.Breef.Infrastructure.ContentExtractors;

public abstract class ContentExtractorBase : IContentExtractor
{
private const string RequiredSuffix = "Extractor";

protected ContentExtractorBase()
{
var typeName = GetType().Name;
if (!typeName.EndsWith(RequiredSuffix, StringComparison.Ordinal))
{
throw new InvalidOperationException(
$"Content extractor class '{typeName}' must end with '{RequiredSuffix}' suffix. " +
$"This convention is required to derive the ExtractType for domain objects.");
}
}

public abstract bool CanHandle(string webPageUrl);

public async Task<Extract> ExtractAsync(string webPageUrl)
{
var result = await CreateUntypedExtractAsync(webPageUrl);
var extractType = GetExtractType();

if (result is null)
{
throw new InvalidOperationException(
$"CreateUntypedExtractAsync returned null for URL '{webPageUrl}' " +
$"in extractor type '{extractType}'. A valid UntypedExtract must be returned.");
}

return result.WithType(extractType);
}

protected abstract Task<UntypedExtract> CreateUntypedExtractAsync(string webPageUrl);

private string GetExtractType()
{
var typeName = GetType().Name;

return typeName[..^RequiredSuffix.Length];
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,11 @@ public async Task<Extract> ExtractAsync(string webPageUrl)

_logger.LogInformation("Extraction will be provided for by {ExtractorName}", extractor.GetType().Name);

return await extractor.ExtractAsync(webPageUrl);
var extract = await extractor.ExtractAsync(webPageUrl);

_logger.LogInformation("Extraction of type {ExtractType} completed.", extract.ExtractType);

return extract;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,14 @@

namespace Elzik.Breef.Infrastructure.ContentExtractors;

public class HtmlContentExtractor(IHttpClientFactory httpClientFactory) : IContentExtractor
public class HtmlContentExtractor(IHttpClientFactory httpClientFactory) : ContentExtractorBase
{
public async Task<Extract> ExtractAsync(string webPageUrl)
public override bool CanHandle(string webPageUrl)
{
return true;
}

protected override async Task<UntypedExtract> CreateUntypedExtractAsync(string webPageUrl)
{
var httpClient = httpClientFactory.CreateClient("BreefDownloader");
var html = await httpClient.GetStringAsync(webPageUrl);
Expand All @@ -17,7 +22,7 @@ public async Task<Extract> ExtractAsync(string webPageUrl)
var largestImageUrl = GetLargestImageUrl(htmlDocument);


return new Extract(title, content, largestImageUrl);
return new UntypedExtract(title, content, largestImageUrl);
}

private static string GetContent(HtmlDocument htmlDocument)
Expand Down Expand Up @@ -79,6 +84,4 @@ private static string GetTitle(HtmlDocument htmlDocument, string defaultWhenMiss

return imageNodesSortedBySize.FirstOrDefault()?.ImageUrl;
}

public bool CanHandle(string webPageUrl) => true;
}
Loading