diff --git a/.gitignore b/.gitignore index 8a30d25..7590ead 100644 --- a/.gitignore +++ b/.gitignore @@ -396,3 +396,4 @@ FodyWeavers.xsd # JetBrains Rider *.sln.iml +/tests/LocalPlaylists diff --git a/Elzik.Breef.sln b/Elzik.Breef.sln index 3148a0b..bff2333 100644 --- a/Elzik.Breef.sln +++ b/Elzik.Breef.sln @@ -26,6 +26,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "TestData", "TestData", "{7F ProjectSection(SolutionItems) = preProject tests\TestData\BbcNewsPage-ExpectedContent.txt = tests\TestData\BbcNewsPage-ExpectedContent.txt tests\TestData\BbcNewsPage.html = tests\TestData\BbcNewsPage.html + tests\TestData\SampleRedditPost-1kqiwzc.json = tests\TestData\SampleRedditPost-1kqiwzc.json tests\TestData\StaticTestPage.html = tests\TestData\StaticTestPage.html tests\TestData\TestHtmlPage-ExpectedContent.txt = tests\TestData\TestHtmlPage-ExpectedContent.txt tests\TestData\TestHtmlPage.html = tests\TestData\TestHtmlPage.html diff --git a/README.md b/README.md index 6ec9ac1..e4382b1 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,34 @@ Example ### Optional +#### Reddit + +These config items relate to the Reddit integration using the Options pattern with support for multiple Reddit instances. + +- **DefaultBaseAddress** - The primary base address for Reddit API requests. Default: `"https://www.reddit.com"`. Must be a valid URL. Used for Refit HTTP client configuration, fallback subreddit image extraction, and primary Reddit instance for content extraction. +- **AdditionalBaseAddresses** - Additional Reddit instances that the content extractors can handle. Default: `["https://reddit.com"]` (includes non-www variant by default). Domain matching is **exact** - if you want to support both `reddit.com` and `www.reddit.com`, you must explicitly configure both. +- **FallbackImageUrl** - The fallback image URL used when subreddit-specific images cannot be retrieved. Default: `"https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Lockup_Logo.svg"`. This URL is used as the default Reddit logo when no subreddit banner, icon, or community image is available. + +The Reddit integration allows extraction of content from: +- Custom Reddit instances +- Alternative Reddit domains +- Corporate or self-hosted Reddit installations +- Specific subdomains (e.g., `old.reddit.com`, `api.reddit.com`) + +**Domain Validation**: The content extractors validate URLs using **exact domain matching**. `reddit.com` does NOT automatically allow `www.reddit.com` - each domain variant must be explicitly configured. + +Example: + +```jsonc +"Reddit": { + "DefaultBaseAddress": "https://www.reddit.com", // breef_Reddit__DefaultBaseAddress + "AdditionalBaseAddresses": [ // breef_Reddit__AdditionalBaseAddresses__0 + "https://reddit.com", // breef_Reddit__AdditionalBaseAddresses__0 + ], + "FallbackImageUrl": "https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Lockup_Logo.svg" // breef_Reddit__FallbackImageUrl +} +``` + #### AiService - **TimeOut** - Sets the number of seconds before the AiService used will time out. The default used if not set is 100 seconds. This may need to be increased where Ollama is used with limiting hardware. @@ -112,13 +140,15 @@ Example: These settings affect how pages are downloaded prior to being summarised. - - **UserAgent** - The user agent used when downloading pages. By default this is set to `Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36` but can be overridden here. +- **UserAgent** - The user agent used when downloading pages. By default this is set to `Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36` but can be overridden here. +- **TimeoutSeconds** - The timeout in seconds for HTTP requests when downloading pages. By default this is set to `30` seconds but can be overridden here. Must be at least 1 second. Example: ```jsonc -"WebPageDownLoader" : { - "UserAgent": "" // breef_WebPageDownLoader__UserAgent +"HttpClient" : { + "UserAgent": "", // breef_HttpClient__UserAgent + "TimeoutSeconds": 30 // breef_HttpClient__TimeoutSeconds } ``` @@ -131,5 +161,4 @@ Logging is handled by Serilog and configuration is documented [here](https://git "MinimumLevel": { "Default": "Debug" // breef_Serilog__MinimumLevel__Default } -} -``` \ No newline at end of file +} \ No newline at end of file diff --git a/src/Elzik.Breef.Api/Elzik.Breef.Api.http b/src/Elzik.Breef.Api/Elzik.Breef.Api.http index 991b96a..f7e122f 100644 --- a/src/Elzik.Breef.Api/Elzik.Breef.Api.http +++ b/src/Elzik.Breef.Api/Elzik.Breef.Api.http @@ -4,5 +4,5 @@ Post {{Elzik.Breef.Api_HostAddress}}/breefs Content-Type: application/json BREEF-API-KEY: test-key { - "url":"https://www.bbc.co.uk/news/articles/cdedkr9439wo" + "url":"https://www.reddit.com/r/dotnet/comments/1o0j6or/im_giving_up_on_copilot_i_spend_more_time/" } diff --git a/src/Elzik.Breef.Api/Program.cs b/src/Elzik.Breef.Api/Program.cs index 785346e..d867754 100644 --- a/src/Elzik.Breef.Api/Program.cs +++ b/src/Elzik.Breef.Api/Program.cs @@ -4,15 +4,15 @@ using Elzik.Breef.Domain; using Elzik.Breef.Infrastructure; using Elzik.Breef.Infrastructure.AI; +using Elzik.Breef.Infrastructure.ContentExtractors; +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit; +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client; +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client.Raw; using Elzik.Breef.Infrastructure.Wallabag; using Microsoft.Extensions.Options; -using Microsoft.SemanticKernel; -using Microsoft.SemanticKernel.ChatCompletion; using Refit; using Serilog; using System.Reflection; -using System.Text.Json; -using System.Text.Json.Serialization; namespace Elzik.Breef.Api; @@ -61,13 +61,57 @@ public static async Task Main(string[] args) .ValidateOnStart(); builder.Services.AddAuth(); - builder.Services.AddOptions() - .Bind(configuration.GetSection("WebPageDownLoader")) + builder.Services.AddOptions() + .Bind(configuration.GetSection("HttpClient")) .ValidateDataAnnotations() .ValidateOnStart(); - builder.Services.AddTransient(); - builder.Services.AddTransient(); + builder.Services.AddHttpClient("BreefDownloader") + .ConfigureHttpClient((provider, client) => + { + var httpClientOptions = provider.GetRequiredService>().Value; + client.Timeout = TimeSpan.FromSeconds(httpClientOptions.TimeoutSeconds); + client.DefaultRequestHeaders.Add("User-Agent", httpClientOptions.UserAgent); + }); + + builder.Services.AddOptions() + .Bind(configuration.GetSection("Reddit")) + .ValidateDataAnnotations() + .ValidateOnStart(); + + builder.Services.AddRefitClient() + .ConfigureHttpClient((provider, client) => + { + var redditOptions = provider.GetRequiredService>().Value; + client.BaseAddress = new Uri(redditOptions.DefaultBaseAddress); + }); + + builder.Services.AddRefitClient() + .ConfigureHttpClient((provider, client) => + { + var redditOptions = provider.GetRequiredService>().Value; + client.BaseAddress = new Uri(redditOptions.DefaultBaseAddress); + }); + + builder.Services.AddTransient(); + builder.Services.AddTransient(); + builder.Services.AddTransient(); + builder.Services.AddTransient(); + + builder.Services.AddTransient(); + builder.Services.AddTransient(); + builder.Services.AddTransient(); + builder.Services.AddTransient(); + builder.Services.AddTransient(provider => + { + var logger = provider.GetRequiredService>(); + var defaultContentExtractor = provider.GetRequiredService(); + var subredditExtractor = provider.GetRequiredService(); + var redditPostExtractor = provider.GetRequiredService(); + return new ContentExtractorStrategy(logger, + [subredditExtractor, redditPostExtractor], + defaultContentExtractor); + }); builder.Services.AddOptions() .Bind(configuration.GetSection("AiService")) diff --git a/src/Elzik.Breef.Application/BreefGenerator.cs b/src/Elzik.Breef.Application/BreefGenerator.cs index 8c26663..6523145 100644 --- a/src/Elzik.Breef.Application/BreefGenerator.cs +++ b/src/Elzik.Breef.Application/BreefGenerator.cs @@ -1,5 +1,4 @@ using Elzik.Breef.Domain; -using System.Diagnostics; namespace Elzik.Breef.Application { diff --git a/src/Elzik.Breef.Application/Elzik.Breef.Application.csproj b/src/Elzik.Breef.Application/Elzik.Breef.Application.csproj index 9b45876..7d312e7 100644 --- a/src/Elzik.Breef.Application/Elzik.Breef.Application.csproj +++ b/src/Elzik.Breef.Application/Elzik.Breef.Application.csproj @@ -6,6 +6,13 @@ enable + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + diff --git a/src/Elzik.Breef.Domain/Elzik.Breef.Domain.csproj b/src/Elzik.Breef.Domain/Elzik.Breef.Domain.csproj index ac3c611..9d47793 100644 --- a/src/Elzik.Breef.Domain/Elzik.Breef.Domain.csproj +++ b/src/Elzik.Breef.Domain/Elzik.Breef.Domain.csproj @@ -1,4 +1,4 @@ - + net8.0 @@ -9,6 +9,10 @@ + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + diff --git a/src/Elzik.Breef.Domain/IContentExtractor.cs b/src/Elzik.Breef.Domain/IContentExtractor.cs index 2b0a89d..fe0e2eb 100644 --- a/src/Elzik.Breef.Domain/IContentExtractor.cs +++ b/src/Elzik.Breef.Domain/IContentExtractor.cs @@ -2,6 +2,8 @@ { public interface IContentExtractor { + bool CanHandle(string webPageUrl); + Task ExtractAsync(string webPageUrl); } } diff --git a/src/Elzik.Breef.Domain/IWebPageDownloader.cs b/src/Elzik.Breef.Domain/IWebPageDownloader.cs deleted file mode 100644 index 3683382..0000000 --- a/src/Elzik.Breef.Domain/IWebPageDownloader.cs +++ /dev/null @@ -1,7 +0,0 @@ -namespace Elzik.Breef.Domain -{ - public interface IWebPageDownloader - { - Task DownloadAsync(string url); - } -} \ No newline at end of file diff --git a/src/Elzik.Breef.Infrastructure/ContentExtractors/ContentExtractorStrategy.cs b/src/Elzik.Breef.Infrastructure/ContentExtractors/ContentExtractorStrategy.cs new file mode 100644 index 0000000..81ef0ee --- /dev/null +++ b/src/Elzik.Breef.Infrastructure/ContentExtractors/ContentExtractorStrategy.cs @@ -0,0 +1,38 @@ +using Elzik.Breef.Domain; +using Microsoft.Extensions.Logging; + +namespace Elzik.Breef.Infrastructure.ContentExtractors +{ + public class ContentExtractorStrategy : IContentExtractor + { + private readonly ILogger _logger; + private readonly List _extractors; + + public ContentExtractorStrategy(ILogger logger, + IEnumerable specificExtractors, IContentExtractor defaultExtractor) + { + ArgumentNullException.ThrowIfNull(logger); + ArgumentNullException.ThrowIfNull(specificExtractors); + ArgumentNullException.ThrowIfNull(defaultExtractor); + + _logger = logger; + + if (specificExtractors.Contains(defaultExtractor)) + throw new ArgumentException("Default extractor should not be in the specific extractors list."); + + _extractors = [.. specificExtractors, defaultExtractor]; + } + + public bool CanHandle(string webPageUrl) => true; + + public async Task ExtractAsync(string webPageUrl) + { + var extractor = _extractors.First(e => e.CanHandle(webPageUrl)); + + _logger.LogInformation("Extraction will be provided for by {ExtractorName}", extractor.GetType().Name); + + return await extractor.ExtractAsync(webPageUrl); + } + } + +} diff --git a/src/Elzik.Breef.Infrastructure/ContentExtractor.cs b/src/Elzik.Breef.Infrastructure/ContentExtractors/HtmlContentExtractor.cs similarity index 88% rename from src/Elzik.Breef.Infrastructure/ContentExtractor.cs rename to src/Elzik.Breef.Infrastructure/ContentExtractors/HtmlContentExtractor.cs index 0694c54..4bccaf3 100644 --- a/src/Elzik.Breef.Infrastructure/ContentExtractor.cs +++ b/src/Elzik.Breef.Infrastructure/ContentExtractors/HtmlContentExtractor.cs @@ -1,13 +1,14 @@ using Elzik.Breef.Domain; using HtmlAgilityPack; -namespace Elzik.Breef.Infrastructure; +namespace Elzik.Breef.Infrastructure.ContentExtractors; -public class ContentExtractor(IWebPageDownloader httpClient) : IContentExtractor +public class HtmlContentExtractor(IHttpClientFactory httpClientFactory) : IContentExtractor { public async Task ExtractAsync(string webPageUrl) { - var html = await httpClient.DownloadAsync(webPageUrl); + var httpClient = httpClientFactory.CreateClient("BreefDownloader"); + var html = await httpClient.GetStringAsync(webPageUrl); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); @@ -78,4 +79,6 @@ private static string GetTitle(HtmlDocument htmlDocument, string defaultWhenMiss return imageNodesSortedBySize.FirstOrDefault()?.ImageUrl; } + + public bool CanHandle(string webPageUrl) => true; } diff --git a/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/AboutSubreddit.cs b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/AboutSubreddit.cs new file mode 100644 index 0000000..640550b --- /dev/null +++ b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/AboutSubreddit.cs @@ -0,0 +1,30 @@ +using System.Text.Json.Serialization; + +namespace Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client; + +public class AboutSubreddit +{ + [JsonPropertyName("data")] + public AboutSubredditData? Data { get; set; } +} + +public class AboutSubredditData +{ + [JsonPropertyName("public_description")] + public string? PublicDescription { get; set; } + + [JsonPropertyName("icon_img")] + public string? IconImg { get; set; } + + [JsonPropertyName("banner_img")] + public string? BannerImg { get; set; } + + [JsonPropertyName("banner_background_image")] + public string? BannerBackgroundImage { get; set; } + + [JsonPropertyName("mobile_banner_image")] + public string? MobileBannerImage { get; set; } + + [JsonPropertyName("community_icon")] + public string? CommunityIcon { get; set; } +} \ No newline at end of file diff --git a/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/IRawNewInSubredditTransformer.cs b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/IRawNewInSubredditTransformer.cs new file mode 100644 index 0000000..c87f953 --- /dev/null +++ b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/IRawNewInSubredditTransformer.cs @@ -0,0 +1,6 @@ +namespace Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client; + +public interface IRawNewInSubredditTransformer +{ + Task Transform(RawNewInSubreddit rawNewInSubreddit); +} \ No newline at end of file diff --git a/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/IRedditPostClient.cs b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/IRedditPostClient.cs new file mode 100644 index 0000000..5fe4ffd --- /dev/null +++ b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/IRedditPostClient.cs @@ -0,0 +1,7 @@ +namespace Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client +{ + public interface IRedditPostClient + { + Task GetPost(string postId); + } +} diff --git a/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/ISubredditClient.cs b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/ISubredditClient.cs new file mode 100644 index 0000000..658af65 --- /dev/null +++ b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/ISubredditClient.cs @@ -0,0 +1,6 @@ +namespace Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client; + +public interface ISubredditClient +{ + Task GetNewInSubreddit(string subRedditName); +} diff --git a/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/NewInSubreddit.cs b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/NewInSubreddit.cs new file mode 100644 index 0000000..770dda2 --- /dev/null +++ b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/NewInSubreddit.cs @@ -0,0 +1,6 @@ +namespace Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client; + +public class NewInSubreddit +{ + public List Posts { get; set; } = []; +} \ No newline at end of file diff --git a/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/Raw/FlexibleStringConverter.cs b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/Raw/FlexibleStringConverter.cs new file mode 100644 index 0000000..51df507 --- /dev/null +++ b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/Raw/FlexibleStringConverter.cs @@ -0,0 +1,30 @@ +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client.Raw; + +public class FlexibleStringConverter : JsonConverter +{ + public override string? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + { + return reader.TokenType switch + { + JsonTokenType.String => reader.GetString(), + JsonTokenType.Number => reader.GetInt64().ToString(), + JsonTokenType.Null => null, + _ => throw new JsonException($"Cannot convert {reader.TokenType} to string") + }; + } + + public override void Write(Utf8JsonWriter writer, string? value, JsonSerializerOptions options) + { + if (value == null) + { + writer.WriteNullValue(); + } + else + { + writer.WriteStringValue(value); + } + } +} \ No newline at end of file diff --git a/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/Raw/IRawRedditPostClient.cs b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/Raw/IRawRedditPostClient.cs new file mode 100644 index 0000000..d47402a --- /dev/null +++ b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/Raw/IRawRedditPostClient.cs @@ -0,0 +1,11 @@ +using Refit; + +namespace Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client.Raw +{ + public interface IRawRedditPostClient + { + [Get("/comments/{postId}.json")] + [Headers("User-Agent: breef/1.0.0 (https://github.com/elzik/breef)")] + Task GetPost(string postId); + } +} diff --git a/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/Raw/IRawRedditPostTransformer.cs b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/Raw/IRawRedditPostTransformer.cs new file mode 100644 index 0000000..c792241 --- /dev/null +++ b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/Raw/IRawRedditPostTransformer.cs @@ -0,0 +1,6 @@ +namespace Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client.Raw; + +public interface IRawRedditPostTransformer +{ + RedditPost Transform(RawRedditPost rawRedditPost); +} \ No newline at end of file diff --git a/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/Raw/IRawSubredditClient.cs b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/Raw/IRawSubredditClient.cs new file mode 100644 index 0000000..ffbd9c2 --- /dev/null +++ b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/Raw/IRawSubredditClient.cs @@ -0,0 +1,14 @@ +using Refit; + +namespace Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client.Raw; + +public interface IRawSubredditClient +{ + [Get("/r/{subRedditName}/new.json")] + [Headers("User-Agent: breef/1.0.0 (https://github.com/elzik/breef)")] + Task GetNewInSubreddit(string subRedditName); + + [Get("/r/{subRedditName}/about.json")] + [Headers("User-Agent: breef/1.0.0 (https://github.com/elzik/breef)")] + Task GetAboutSubreddit(string subRedditName); +} \ No newline at end of file diff --git a/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/Raw/RawRedditPost.cs b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/Raw/RawRedditPost.cs new file mode 100644 index 0000000..36273e2 --- /dev/null +++ b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/Raw/RawRedditPost.cs @@ -0,0 +1,154 @@ +using System.Text.Json.Serialization; + +namespace Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client.Raw; + +public class RawRedditPost : List +{ +} + +public class RawRedditListing +{ + [JsonPropertyName("kind")] + public string? Kind { get; set; } + + [JsonPropertyName("data")] + public RawRedditListingData Data { get; set; } = new(); +} + +public class RawRedditListingData +{ + [JsonPropertyName("after")] + public string? After { get; set; } + + [JsonPropertyName("before")] + public string? Before { get; set; } + + [JsonPropertyName("children")] + public List Children { get; set; } = []; +} + +public class RawRedditChild +{ + [JsonPropertyName("kind")] + public string? Kind { get; set; } + + [JsonPropertyName("data")] + public RawRedditCommentData Data { get; set; } = new(); +} + +public class RawRedditCommentData +{ + [JsonPropertyName("id")] + [JsonConverter(typeof(FlexibleStringConverter))] + public string? Id { get; set; } + + [JsonPropertyName("author")] + public string? Author { get; set; } + + [JsonPropertyName("body")] + public string? Body { get; set; } + + [JsonPropertyName("selftext")] + public string? SelfText { get; set; } + + [JsonPropertyName("title")] + public string? Title { get; set; } + + [JsonPropertyName("score")] + public int Score { get; set; } + + [JsonPropertyName("subreddit")] + public string? Subreddit { get; set; } + + [JsonPropertyName("created_utc")] + [JsonConverter(typeof(RedditDateTimeConverter))] + public DateTime CreatedUtc { get; set; } + + [JsonPropertyName("replies")] + public object? Replies { get; set; } // Use object to handle both RawRedditListing and empty string cases + + [JsonPropertyName("url")] + public string? Url { get; set; } + + [JsonPropertyName("url_overridden_by_dest")] + public string? UrlOverriddenByDest { get; set; } + + [JsonPropertyName("thumbnail")] + public string? Thumbnail { get; set; } + + [JsonPropertyName("preview")] + public RawRedditPreview? Preview { get; set; } + + [JsonPropertyName("is_gallery")] + public bool IsGallery { get; set; } + + [JsonPropertyName("media_metadata")] + public Dictionary? MediaMetadata { get; set; } + + [JsonPropertyName("gallery_data")] + public RawRedditGalleryData? GalleryData { get; set; } + + [JsonIgnore] + public string? Content => Body ?? SelfText; +} + +public class RawRedditPreview +{ + [JsonPropertyName("images")] + public List? Images { get; set; } + + [JsonPropertyName("enabled")] + public bool Enabled { get; set; } +} + +public class RawRedditPreviewImage +{ + [JsonPropertyName("source")] + public RawRedditImageSource? Source { get; set; } + + [JsonPropertyName("resolutions")] + public List? Resolutions { get; set; } +} + +public class RawRedditImageSource +{ + [JsonPropertyName("url")] + public string? Url { get; set; } + + [JsonPropertyName("width")] + public int Width { get; set; } + + [JsonPropertyName("height")] + public int Height { get; set; } +} + +public class RawRedditMediaMetadata +{ + [JsonPropertyName("s")] + public RawRedditImageSource? Source { get; set; } + + [JsonPropertyName("status")] + public string? Status { get; set; } + + [JsonPropertyName("e")] + public string? Extension { get; set; } + + [JsonPropertyName("m")] + public string? MimeType { get; set; } +} + +public class RawRedditGalleryData +{ + [JsonPropertyName("items")] + public List? Items { get; set; } +} + +public class RawRedditGalleryItem +{ + [JsonPropertyName("media_id")] + public string? MediaId { get; set; } + + [JsonPropertyName("id")] + [JsonConverter(typeof(FlexibleStringConverter))] + public string? Id { get; set; } +} diff --git a/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/Raw/RawRedditPostTransformer.cs b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/Raw/RawRedditPostTransformer.cs new file mode 100644 index 0000000..ec30a74 --- /dev/null +++ b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/Raw/RawRedditPostTransformer.cs @@ -0,0 +1,178 @@ +using System.Text.Json; +using System.Web; + +namespace Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client.Raw; + +public class RawRedditPostTransformer : IRawRedditPostTransformer +{ + public RedditPost Transform(RawRedditPost rawRedditPost) + { + ArgumentNullException.ThrowIfNull(rawRedditPost); + if (rawRedditPost.Count < 2) + throw new ArgumentException("Reddit post must have at least 2 listings (post and comments)", nameof(rawRedditPost)); + + var postListing = rawRedditPost[0]; + var commentsListing = rawRedditPost[1]; + + var postChildren = postListing.Data?.Children; + if (postChildren == null || postChildren.Count == 0) + throw new ArgumentException("Post listing must contain at least one child", nameof(rawRedditPost)); + + var mainPostData = postChildren[0].Data; + var bestImage = ExtractBestImage(mainPostData); + + var redditPost = new RedditPost + { + Post = new RedditPostContent + { + Id = mainPostData.Id ?? string.Empty, + Title = mainPostData.Title ?? throw new InvalidOperationException("Reddit post must have a title"), + Author = mainPostData.Author ?? string.Empty, + Subreddit = mainPostData.Subreddit ?? string.Empty, + Score = mainPostData.Score, + Content = mainPostData.Content ?? string.Empty, + CreatedUtc = mainPostData.CreatedUtc, + ImageUrl = bestImage + }, + Comments = TransformComments(commentsListing) + }; + + return redditPost; + } + + private static string? ExtractBestImage(RawRedditCommentData postData) + { + // 1. Gallery images (highest priority) - pick the first/largest + if (postData.IsGallery && postData.GalleryData?.Items != null && postData.MediaMetadata != null) + { + var bestGalleryImage = postData.GalleryData.Items + .Where(item => item.MediaId != null && postData.MediaMetadata.ContainsKey(item.MediaId)) + .Select(item => postData.MediaMetadata[item.MediaId!]) + .Where(metadata => metadata.Status == "valid" && metadata.Source?.Url != null) + .OrderByDescending(metadata => metadata.Source!.Width * metadata.Source.Height) + .FirstOrDefault(); + + if (bestGalleryImage?.Source?.Url != null) + { + return HttpUtility.HtmlDecode(bestGalleryImage.Source.Url); + } + } + + // 2. Preview images (high priority) - pick the largest + if (postData.Preview?.Images != null) + { + var bestPreviewImage = postData.Preview.Images + .Where(img => img.Source?.Url != null) + .OrderByDescending(img => img.Source!.Width * img.Source.Height) + .FirstOrDefault(); + + if (bestPreviewImage?.Source?.Url != null) + { + return HttpUtility.HtmlDecode(bestPreviewImage.Source.Url); + } + } + + // 3. Direct image URL + var directUrl = postData.UrlOverriddenByDest ?? postData.Url; + if (IsImageUrl(directUrl)) + { + return directUrl; + } + + // 4. Thumbnail (last resort) + if (!string.IsNullOrEmpty(postData.Thumbnail) && + postData.Thumbnail != "self" && + postData.Thumbnail != "default" && + postData.Thumbnail != "nsfw" && + IsImageUrl(postData.Thumbnail)) + { + return postData.Thumbnail; + } + + return null; + } + + private static bool IsImageUrl(string? url) + { + if (string.IsNullOrEmpty(url)) + return false; + + if (!Uri.TryCreate(url, UriKind.Absolute, out var uri)) + return false; + + var extension = Path.GetExtension(uri.AbsolutePath).ToLowerInvariant(); + return extension is ".jpg" or ".jpeg" or ".png" or ".gif" or ".webp" or ".bmp" or ".svg"; + } + + private List TransformComments(List children) + { + var comments = new List(); + + foreach (var child in children) + { + if (child.Kind == "t1") + { + var comment = new RedditComment + { + Id = child.Data.Id ?? string.Empty, + Author = child.Data.Author ?? string.Empty, + Score = child.Data.Score, + Content = child.Data.Content ?? string.Empty, + CreatedUtc = child.Data.CreatedUtc, + Replies = TransformComments(child.Data.Replies) + }; + + comments.Add(comment); + } + } + + return comments; + } + + private List TransformComments(object? replies) + { + if (replies == null) + return []; + + if (replies is string stringReply && stringReply == "") + return []; + + if (replies is JsonElement jsonElement) + { + if (jsonElement.ValueKind == JsonValueKind.Null) + return []; + + if (jsonElement.ValueKind == JsonValueKind.String && jsonElement.GetString() == "") + return []; + + try + { + var deserializedListing = JsonSerializer.Deserialize(jsonElement.GetRawText()); + return TransformComments(deserializedListing); + } + catch + { + return []; + } + } + + if (replies is RawRedditListing listing) + return TransformComments(listing); + + return []; + } + + private List TransformComments(RawRedditListing? replies) + { + if (replies == null) + return []; + + if (replies.Data == null) + return []; + + if (replies.Data.Children == null) + return []; + + return TransformComments(replies.Data.Children); + } +} \ No newline at end of file diff --git a/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/Raw/RedditDateTimeConverter.cs b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/Raw/RedditDateTimeConverter.cs new file mode 100644 index 0000000..2ff4fd9 --- /dev/null +++ b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/Raw/RedditDateTimeConverter.cs @@ -0,0 +1,36 @@ +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client.Raw +{ + public class RedditDateTimeConverter : JsonConverter + { + public override DateTime Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + { + if (reader.TokenType == JsonTokenType.Null) + return default; + + if (reader.TokenType == JsonTokenType.Number && reader.TryGetDouble(out double doubleSeconds)) + { + return DateTimeOffset.FromUnixTimeSeconds((long)doubleSeconds).UtcDateTime; + } + + throw new JsonException("Invalid Unix timestamp for DateTime."); + } + + public override void Write(Utf8JsonWriter writer, DateTime value, JsonSerializerOptions options) + { + var utc = value.Kind switch + { + DateTimeKind.Utc => value, + DateTimeKind.Local => value.ToUniversalTime(), + DateTimeKind.Unspecified => DateTime.SpecifyKind(value, DateTimeKind.Utc), + _ => value + }; + + writer.WriteNumberValue(new DateTimeOffset(utc).ToUnixTimeSeconds()); + } + + + } +} \ No newline at end of file diff --git a/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/RawNewInSubreddit.cs b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/RawNewInSubreddit.cs new file mode 100644 index 0000000..bbb105a --- /dev/null +++ b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/RawNewInSubreddit.cs @@ -0,0 +1,39 @@ +using System.Text.Json.Serialization; + +namespace Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client; + +public class RawNewInSubreddit +{ + [JsonPropertyName("data")] + public RawListingData? Data { get; set; } +} + +public class RawListingData +{ + [JsonPropertyName("children")] + public List? Children { get; set; } +} + +public class RawChild +{ + [JsonPropertyName("data")] + public RawPostData? Data { get; set; } +} + +public class RawPostData +{ + [JsonPropertyName("title")] + public string? Title { get; set; } + + [JsonPropertyName("selftext")] + public string? SelfText { get; set; } + + [JsonPropertyName("author")] + public string? Author { get; set; } + + [JsonPropertyName("id")] + public string? Id { get; set; } + + [JsonPropertyName("url")] + public string? Url { get; set; } +} diff --git a/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/RawNewInSubredditTransformer.cs b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/RawNewInSubredditTransformer.cs new file mode 100644 index 0000000..dabab57 --- /dev/null +++ b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/RawNewInSubredditTransformer.cs @@ -0,0 +1,28 @@ +namespace Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client; + +public class RawNewInSubredditTransformer(IRedditPostClient redditPostClient) : IRawNewInSubredditTransformer +{ + public async Task Transform(RawNewInSubreddit rawNewInSubreddit) + { + ArgumentNullException.ThrowIfNull(rawNewInSubreddit); + + var newInSubreddit = new NewInSubreddit(); + + if (rawNewInSubreddit.Data?.Children == null || rawNewInSubreddit.Data.Children.Count == 0) + { + return newInSubreddit; + } + + var postIds = rawNewInSubreddit.Data.Children + .Where(child => child.Data?.Id != null) + .Select(child => child.Data!.Id!) + .ToList(); + + var postTasks = postIds.Select(id => redditPostClient.GetPost(id)); + var posts = await Task.WhenAll(postTasks); + + newInSubreddit.Posts.AddRange(posts); + + return newInSubreddit; + } +} \ No newline at end of file diff --git a/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/RedditPost.cs b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/RedditPost.cs new file mode 100644 index 0000000..b9815cf --- /dev/null +++ b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/RedditPost.cs @@ -0,0 +1,29 @@ +namespace Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client; + +public class RedditPost +{ + public RedditPostContent Post { get; set; } = new(); + public List Comments { get; set; } = []; +} + +public class RedditPostContent +{ + public string Id { get; set; } = string.Empty; + public string Title { get; set; } = string.Empty; + public string Author { get; set; } = string.Empty; + public string Subreddit { get; set; } = string.Empty; + public int Score { get; set; } + public string Content { get; set; } = string.Empty; + public DateTime CreatedUtc { get; set; } + public string? ImageUrl { get; set; } +} + +public class RedditComment +{ + public string Id { get; set; } = string.Empty; + public string Author { get; set; } = string.Empty; + public int Score { get; set; } + public string Content { get; set; } = string.Empty; + public DateTime CreatedUtc { get; set; } + public List Replies { get; set; } = []; +} \ No newline at end of file diff --git a/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/RedditPostClient.cs b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/RedditPostClient.cs new file mode 100644 index 0000000..f1cbe6d --- /dev/null +++ b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/RedditPostClient.cs @@ -0,0 +1,12 @@ +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client.Raw; + +namespace Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client; + +public class RedditPostClient(IRawRedditPostClient redditPostClient, IRawRedditPostTransformer transformer) : IRedditPostClient +{ + public async Task GetPost(string postId) + { + var redditPost = await redditPostClient.GetPost(postId); + return transformer.Transform(redditPost); + } +} \ No newline at end of file diff --git a/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/SubredditClient.cs b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/SubredditClient.cs new file mode 100644 index 0000000..ebb1a69 --- /dev/null +++ b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/Client/SubredditClient.cs @@ -0,0 +1,13 @@ +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client.Raw; + +namespace Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client; + +public class SubredditClient(IRawSubredditClient rawSubredditClient, IRawNewInSubredditTransformer transformer) : ISubredditClient +{ + public async Task GetNewInSubreddit(string subRedditName) + { + var rawNewInSubreddit = await rawSubredditClient.GetNewInSubreddit(subRedditName); + + return await transformer.Transform(rawNewInSubreddit); + } +} \ No newline at end of file diff --git a/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/ISubredditImageExtractor.cs b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/ISubredditImageExtractor.cs new file mode 100644 index 0000000..93d2cc2 --- /dev/null +++ b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/ISubredditImageExtractor.cs @@ -0,0 +1,6 @@ +namespace Elzik.Breef.Infrastructure.ContentExtractors.Reddit; + +public interface ISubredditImageExtractor +{ + Task GetSubredditImageUrlAsync(string subredditName); +} \ No newline at end of file diff --git a/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/RedditOptions.cs b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/RedditOptions.cs new file mode 100644 index 0000000..deae003 --- /dev/null +++ b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/RedditOptions.cs @@ -0,0 +1,33 @@ +using System.ComponentModel.DataAnnotations; + +namespace Elzik.Breef.Infrastructure.ContentExtractors.Reddit; + +public class RedditOptions +{ + [Required] + [Url] + public string DefaultBaseAddress { get; set; } = "https://www.reddit.com"; + + public List AdditionalBaseAddresses { get; set; } = []; + + public string FallbackImageUrl { get; set; } = "https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Lockup_Logo.svg"; + + public IEnumerable AllBaseAddresses => + new[] { DefaultBaseAddress }.Concat(GetEffectiveAdditionalBaseAddresses()); + + public IEnumerable AllDomains => + AllBaseAddresses + .Select(url => Uri.TryCreate(url, UriKind.Absolute, out var uri) ? uri : null) + .Where(uri => uri != null) + .Select(uri => uri!.Host); + + private List GetEffectiveAdditionalBaseAddresses() + { + if (AdditionalBaseAddresses.Count == 0) + { + return ["https://reddit.com"]; + } + + return AdditionalBaseAddresses; + } +} \ No newline at end of file diff --git a/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/RedditPostContentExtractor.cs b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/RedditPostContentExtractor.cs new file mode 100644 index 0000000..e46a38d --- /dev/null +++ b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/RedditPostContentExtractor.cs @@ -0,0 +1,74 @@ +using Elzik.Breef.Domain; +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client; +using Microsoft.Extensions.Options; +using System.Text.Json; + +namespace Elzik.Breef.Infrastructure.ContentExtractors.Reddit; + +public class RedditPostContentExtractor( + IRedditPostClient redditPostClient, + ISubredditImageExtractor subredditImageExtractor, + IOptions redditOptions) : IContentExtractor +{ + private readonly RedditOptions _redditOptions = redditOptions.Value; + + public bool CanHandle(string webPageUrl) + { + if (!Uri.TryCreate(webPageUrl, UriKind.Absolute, out Uri? webPageUri)) + return false; + + var requestDomain = webPageUri.Host; + + if (!_redditOptions.AllDomains.Any(allowedDomain => + requestDomain.Equals(allowedDomain, StringComparison.OrdinalIgnoreCase))) + return false; + + var segments = webPageUri.AbsolutePath.Trim('/').Split('/'); + + return + (segments.Length == 4 || segments.Length == 5) && + segments[0].Equals("r", StringComparison.OrdinalIgnoreCase) && + segments[2].Equals("comments", StringComparison.OrdinalIgnoreCase); + } + + public async Task ExtractAsync(string webPageUrl) + { + if (!Uri.TryCreate(webPageUrl, UriKind.Absolute, out Uri? webPageUri)) + throw new InvalidOperationException($"Invalid URL format: '{webPageUrl}'. " + + $"URL must be a valid absolute URI."); + + var requestDomain = webPageUri.Host; + + if (!_redditOptions.AllDomains.Any(allowedDomain => + requestDomain.Equals(allowedDomain, StringComparison.OrdinalIgnoreCase))) + { + var supportedDomains = string.Join(", ", _redditOptions.AllDomains); + throw new InvalidOperationException($"Unsupported domain: '{requestDomain}'. " + + $"Supported domains: {supportedDomains}"); + } + + var segments = webPageUri.AbsolutePath.Trim('/').Split('/'); + + if (!((segments.Length == 4 || segments.Length == 5) && + segments[0].Equals("r", StringComparison.OrdinalIgnoreCase) && + segments[2].Equals("comments", StringComparison.OrdinalIgnoreCase))) + { + throw new InvalidOperationException($"Unsupported Reddit URL format: '{webPageUrl}'. " + + $"Expected format: 'https://[reddit-domain]/r/[subreddit]/comments/[postId]' " + + $"or 'https://[reddit-domain]/r/[subreddit]/comments/[postId]/[title]'."); + } + + var postId = segments[3]; + var post = await redditPostClient.GetPost(postId); + + if (string.IsNullOrWhiteSpace(post.Post.ImageUrl)) + { + var subredditName = segments[1]; + post.Post.ImageUrl = await subredditImageExtractor.GetSubredditImageUrlAsync(subredditName); + } + + var postJson = JsonSerializer.Serialize(post); + + return new Extract(post.Post.Title, postJson, post.Post.ImageUrl); + } +} diff --git a/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/SubRedditContentExtractor.cs b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/SubRedditContentExtractor.cs new file mode 100644 index 0000000..14ac60a --- /dev/null +++ b/src/Elzik.Breef.Infrastructure/ContentExtractors/Reddit/SubRedditContentExtractor.cs @@ -0,0 +1,85 @@ +using Elzik.Breef.Domain; +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client; +using Microsoft.Extensions.Options; +using System.Text.Json; + +namespace Elzik.Breef.Infrastructure.ContentExtractors.Reddit; + +public class SubredditContentExtractor + (ISubredditClient subredditClient, IHttpClientFactory httpClientFactory, IOptions redditOptions) + : IContentExtractor, ISubredditImageExtractor +{ + private const char UrlPathSeparator = '/'; + private readonly IHttpClientFactory _httpClientFactory = httpClientFactory; + private readonly RedditOptions _redditOptions = redditOptions.Value; + + public bool CanHandle(string webPageUrl) + { + if (!Uri.TryCreate(webPageUrl, UriKind.Absolute, out Uri? webPageUri)) + return false; + + var requestDomain = webPageUri.Host; + + if (!_redditOptions.AllDomains.Any(allowedDomain => + requestDomain.Equals(allowedDomain, StringComparison.OrdinalIgnoreCase))) + return false; + + var segments = webPageUri.AbsolutePath.Trim(UrlPathSeparator).Split(UrlPathSeparator); + + return + segments.Length == 2 && + segments[0].Equals("r", StringComparison.OrdinalIgnoreCase); + } + + public async Task ExtractAsync(string webPageUrl) + { + var webPageUri = new Uri(webPageUrl.EndsWith(UrlPathSeparator) ? webPageUrl : webPageUrl + UrlPathSeparator, UriKind.Absolute); + var webPageParts = webPageUri.AbsolutePath.Trim(UrlPathSeparator).Split(UrlPathSeparator); + var subredditName = webPageParts[^1]; + + var newInSubreddit = await subredditClient.GetNewInSubreddit(subredditName); + var jsonContent = JsonSerializer.Serialize(newInSubreddit); + var imageUrl = await ExtractImageUrlAsync(webPageUri); + + return new Extract($"New in r/{subredditName}", jsonContent, imageUrl); + } + + public async Task GetSubredditImageUrlAsync(string subredditName) + { + var subRedditBaseUri = new Uri($"{_redditOptions.DefaultBaseAddress}/r/{subredditName}/"); + return await ExtractImageUrlAsync(subRedditBaseUri); + } + + private async Task ExtractImageUrlAsync(Uri subRedditBaseUri) + { + Uri subRedditAboutUri = new(subRedditBaseUri, "about.json"); + var httpClient = _httpClientFactory.CreateClient("BreefDownloader"); + var jsonContent = await httpClient.GetStringAsync(subRedditAboutUri.AbsoluteUri); + + string[] imageKeys = ["banner_background_image", "banner_img", "mobile_banner_image", "icon_img", "community_icon"]; + + using var doc = JsonDocument.Parse(jsonContent); + var data = doc.RootElement.GetProperty("data"); + + foreach (var imageKey in imageKeys) + { + if (data.TryGetProperty(imageKey, out var prop)) + { + var imageUrl = prop.GetString(); + if (!string.IsNullOrWhiteSpace(imageUrl) && + Uri.TryCreate(imageUrl, UriKind.Absolute, out var uri) && + (uri.Scheme == "http" || uri.Scheme == "https")) + { + var client = _httpClientFactory.CreateClient("BreefDownloader"); + using var response = await client.GetAsync(imageUrl); + if (response.IsSuccessStatusCode) + { + return imageUrl; + } + } + } + } + + return _redditOptions.FallbackImageUrl; + } +} diff --git a/src/Elzik.Breef.Infrastructure/Elzik.Breef.Infrastructure.csproj b/src/Elzik.Breef.Infrastructure/Elzik.Breef.Infrastructure.csproj index 574ec4a..31c3e39 100644 --- a/src/Elzik.Breef.Infrastructure/Elzik.Breef.Infrastructure.csproj +++ b/src/Elzik.Breef.Infrastructure/Elzik.Breef.Infrastructure.csproj @@ -1,4 +1,4 @@ - + net8.0 @@ -15,6 +15,10 @@ + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + diff --git a/src/Elzik.Breef.Infrastructure/HttpClientOptions.cs b/src/Elzik.Breef.Infrastructure/HttpClientOptions.cs new file mode 100644 index 0000000..f2531e4 --- /dev/null +++ b/src/Elzik.Breef.Infrastructure/HttpClientOptions.cs @@ -0,0 +1,14 @@ +using System.ComponentModel.DataAnnotations; + +namespace Elzik.Breef.Infrastructure; + +public class HttpClientOptions +{ + [Required] + public string UserAgent { get; set; } = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + + "AppleWebKit/537.36 (KHTML, like Gecko) " + + "Chrome/110.0.0.0 Safari/537.36"; + + [Range(1, int.MaxValue)] + public int TimeoutSeconds { get; set; } = 30; +} diff --git a/src/Elzik.Breef.Infrastructure/Wallabag/TokenResponse.cs b/src/Elzik.Breef.Infrastructure/Wallabag/TokenResponse.cs index 95a0ea2..b1d1895 100644 --- a/src/Elzik.Breef.Infrastructure/Wallabag/TokenResponse.cs +++ b/src/Elzik.Breef.Infrastructure/Wallabag/TokenResponse.cs @@ -1,5 +1,4 @@ -using Refit; -using System.Text.Json.Serialization; +using System.Text.Json.Serialization; namespace Elzik.Breef.Infrastructure.Wallabag { diff --git a/src/Elzik.Breef.Infrastructure/Wallabag/WallabagEntry.cs b/src/Elzik.Breef.Infrastructure/Wallabag/WallabagEntry.cs index 2043b90..8f6a868 100644 --- a/src/Elzik.Breef.Infrastructure/Wallabag/WallabagEntry.cs +++ b/src/Elzik.Breef.Infrastructure/Wallabag/WallabagEntry.cs @@ -1,6 +1,3 @@ -using Refit; -using System; -using System.Collections.Generic; using System.Text.Json.Serialization; namespace Elzik.Breef.Infrastructure.Wallabag diff --git a/src/Elzik.Breef.Infrastructure/Wallabag/WallabagEntryCreateRequest.cs b/src/Elzik.Breef.Infrastructure/Wallabag/WallabagEntryCreateRequest.cs index 6d9e4cb..f1d030d 100644 --- a/src/Elzik.Breef.Infrastructure/Wallabag/WallabagEntryCreateRequest.cs +++ b/src/Elzik.Breef.Infrastructure/Wallabag/WallabagEntryCreateRequest.cs @@ -1,5 +1,4 @@ -using Refit; -using System.Text.Json.Serialization; +using System.Text.Json.Serialization; namespace Elzik.Breef.Infrastructure.Wallabag { diff --git a/src/Elzik.Breef.Infrastructure/WebPageDownLoaderOptions.cs b/src/Elzik.Breef.Infrastructure/WebPageDownLoaderOptions.cs deleted file mode 100644 index f09f9ab..0000000 --- a/src/Elzik.Breef.Infrastructure/WebPageDownLoaderOptions.cs +++ /dev/null @@ -1,11 +0,0 @@ -using System.ComponentModel.DataAnnotations; - -namespace Elzik.Breef.Infrastructure; - -public class WebPageDownLoaderOptions -{ - [Required] - public string UserAgent { get; set; } = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + - "AppleWebKit/537.36 (KHTML, like Gecko) " + - "Chrome/110.0.0.0 Safari/537.36"; -} diff --git a/src/Elzik.Breef.Infrastructure/WebPageDownloader.cs b/src/Elzik.Breef.Infrastructure/WebPageDownloader.cs deleted file mode 100644 index 00a6fb4..0000000 --- a/src/Elzik.Breef.Infrastructure/WebPageDownloader.cs +++ /dev/null @@ -1,31 +0,0 @@ -using Elzik.Breef.Domain; -using Microsoft.Extensions.Logging; -using Microsoft.Extensions.Options; - -namespace Elzik.Breef.Infrastructure -{ - public sealed class WebPageDownloader : IWebPageDownloader, IDisposable - { - private readonly HttpClient _httpClient; - - public WebPageDownloader(ILogger logger, - IOptions WebPageDownLoaderOptions) - { - _httpClient = new HttpClient(); - _httpClient.DefaultRequestHeaders.Add("User-Agent", WebPageDownLoaderOptions.Value.UserAgent); - - logger.LogInformation("Downloads will be made using the User-Agent: {UserAgent}", - _httpClient.DefaultRequestHeaders.UserAgent); - } - - public async Task DownloadAsync(string url) - { - return await _httpClient.GetStringAsync(url); - } - - public void Dispose() - { - _httpClient.Dispose(); - } - } -} diff --git a/tests/Elzik.Breef.Api.Tests.Functional/BreefTestsBase.cs b/tests/Elzik.Breef.Api.Tests.Functional/BreefTestsBase.cs index 93bece5..a14e9c6 100644 --- a/tests/Elzik.Breef.Api.Tests.Functional/BreefTestsBase.cs +++ b/tests/Elzik.Breef.Api.Tests.Functional/BreefTestsBase.cs @@ -1,5 +1,4 @@ using Elzik.Breef.Api.Presentation; -using Elzik.Breef.Domain; using Elzik.Breef.Infrastructure.Wallabag; using Microsoft.Extensions.Configuration; using Shouldly; diff --git a/tests/Elzik.Breef.Api.Tests.Functional/BreefTestsDocker.cs b/tests/Elzik.Breef.Api.Tests.Functional/BreefTestsDocker.cs index 4f573d1..3fd80d0 100644 --- a/tests/Elzik.Breef.Api.Tests.Functional/BreefTestsDocker.cs +++ b/tests/Elzik.Breef.Api.Tests.Functional/BreefTestsDocker.cs @@ -1,6 +1,5 @@ using DotNet.Testcontainers.Builders; using DotNet.Testcontainers.Containers; -using Microsoft.AspNetCore.Mvc; using System.Diagnostics; using Xunit.Abstractions; @@ -9,6 +8,7 @@ namespace Elzik.Breef.Api.Tests.Functional; public class BreefTestsDocker : BreefTestsBase, IAsyncLifetime { private const string DockerImageName = "ghcr.io/elzik/elzik-breef-api:latest"; + private const int ContainerStartTimeoutSeconds = 30; private readonly IContainer? _testContainer; private readonly ITestOutputHelper _testOutputHelper; private readonly bool _dockerIsUnavailable; @@ -80,7 +80,7 @@ public BreefTestsDocker(ITestOutputHelper testOutputHelper) .WithEnvironment("breef_Wallabag__Password", breefWallabagPassword) .WithEnvironment("breef_Wallabag__ClientId", breefWallabagClientId) .WithEnvironment("breef_Wallabag__ClientSecret", breefWallabagClientSecret) - .WithWaitStrategy(Wait.ForUnixContainer().UntilPortIsAvailable(8080)) + .WithWaitStrategy(Wait.ForUnixContainer().UntilInternalTcpPortIsAvailable(8080)) .WithOutputConsumer(outputConsumer) .Build(); } @@ -157,9 +157,25 @@ public async Task InitializeAsync() { if (!_dockerIsUnavailable) { - await _testContainer!.StartAsync(); // Null forgiven since if we're not skipping tests, - // _testContainer will never be null - HostPort = _testContainer.GetMappedPublicPort(8080); + using var timeoutCts = new CancellationTokenSource(TimeSpan.FromSeconds(ContainerStartTimeoutSeconds)); + + if(_testContainer == null) + { + throw new InvalidOperationException("Test container is not initialized " + + "and cannot be started."); + } + + try + { + await _testContainer.StartAsync(timeoutCts.Token); + HostPort = _testContainer.GetMappedPublicPort(8080); + } + catch (OperationCanceledException) when (timeoutCts.Token.IsCancellationRequested) + { + throw new TimeoutException($"Container failed to start within {ContainerStartTimeoutSeconds} seconds. " + + $"This may indicate that the container is taking too long to become ready " + + $"or there's an issue with the container startup."); + } } } @@ -167,8 +183,13 @@ public async Task DisposeAsync() { if (!_dockerIsUnavailable) { - await _testContainer!.StopAsync(); // Null forgiven since if we're not skipping tests, - // _testContainer will never be null + if (_testContainer == null) + { + throw new InvalidOperationException("Test container is not initialized " + + "and cannot be stopped."); + } + + await _testContainer.StopAsync(); } } } diff --git a/tests/Elzik.Breef.Api.Tests.Functional/Elzik.Breef.Api.Tests.Functional.csproj b/tests/Elzik.Breef.Api.Tests.Functional/Elzik.Breef.Api.Tests.Functional.csproj index 60bb3b3..545222b 100644 --- a/tests/Elzik.Breef.Api.Tests.Functional/Elzik.Breef.Api.Tests.Functional.csproj +++ b/tests/Elzik.Breef.Api.Tests.Functional/Elzik.Breef.Api.Tests.Functional.csproj @@ -1,4 +1,4 @@ - + net8.0 diff --git a/tests/Elzik.Breef.Api.Tests.Integration/Elzik.Breef.Api.Tests.Integration.csproj b/tests/Elzik.Breef.Api.Tests.Integration/Elzik.Breef.Api.Tests.Integration.csproj index 94714b3..4d3f8e8 100644 --- a/tests/Elzik.Breef.Api.Tests.Integration/Elzik.Breef.Api.Tests.Integration.csproj +++ b/tests/Elzik.Breef.Api.Tests.Integration/Elzik.Breef.Api.Tests.Integration.csproj @@ -1,4 +1,4 @@ - + net8.0 diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Integration/ContentExtractorTests.cs b/tests/Elzik.Breef.Infrastructure.Tests.Integration/ContentExtractors/HtmlContentExtractorTests.cs similarity index 54% rename from tests/Elzik.Breef.Infrastructure.Tests.Integration/ContentExtractorTests.cs rename to tests/Elzik.Breef.Infrastructure.Tests.Integration/ContentExtractors/HtmlContentExtractorTests.cs index f55e45f..9c67ed4 100644 --- a/tests/Elzik.Breef.Infrastructure.Tests.Integration/ContentExtractorTests.cs +++ b/tests/Elzik.Breef.Infrastructure.Tests.Integration/ContentExtractors/HtmlContentExtractorTests.cs @@ -1,11 +1,11 @@ using Elzik.Breef.Domain; -using Elzik.Breef.Infrastructure; +using Elzik.Breef.Infrastructure.ContentExtractors; using NSubstitute; using Shouldly; -namespace Elzik.Breef.Infrastructure.Tests.Integration +namespace Elzik.Breef.Infrastructure.Tests.Integration.ContentExtractors { - public class ContentExtractorTests + public class HtmlContentExtractorTests { [Theory] [InlineData("TestHtmlPage.html", "TestHtmlPage-ExpectedContent.txt", "Test HTML Page", "https://test-large-image.jpg")] @@ -18,12 +18,18 @@ public async Task Extract_WithValidUrl_ExtractsContent(string testFileName, stri { // Arrange var mockTestUrl = "https://mock.url"; - var mockHttpClient = Substitute.For(); + var mockHttpClientFactory = Substitute.For(); + var mockHttpClient = Substitute.For(); + mockHttpClientFactory.CreateClient("BreefDownloader").Returns(mockHttpClient); + var testHtml = await File.ReadAllTextAsync(Path.Join("../../../../TestData", testFileName)); - mockHttpClient.DownloadAsync(Arg.Is(mockTestUrl)).Returns(Task.FromResult(testHtml)); + + var mockHandler = new MockHttpMessageHandler(testHtml); + var httpClient = new HttpClient(mockHandler); + mockHttpClientFactory.CreateClient("BreefDownloader").Returns(httpClient); // Act - var extractor = new ContentExtractor(mockHttpClient); + var extractor = new HtmlContentExtractor(mockHttpClientFactory); var result = await extractor.ExtractAsync(mockTestUrl); // Assert @@ -37,9 +43,35 @@ public async Task Extract_WithValidUrl_ExtractsContent(string testFileName, stri result.PreviewImageUrl.ShouldBe(expectedPreviewImageUrl); } + [Fact] + public void CanHandle_AnyString_CanHandle() + { + // Arrange + var mockHttpClientFactory = Substitute.For(); + + // Act + var defaultOnlyContentExtractorStrategy = new HtmlContentExtractor(mockHttpClientFactory); + var canHandleAnyString = defaultOnlyContentExtractorStrategy.CanHandle("Any string."); + + // Assert + canHandleAnyString.ShouldBeTrue(); + } + private static string NormaliseLineEndings(string text) { return text.Replace("\r\n", "\n"); } + + private class MockHttpMessageHandler(string content) : HttpMessageHandler + { + protected override Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) + { + return Task.FromResult(new HttpResponseMessage + { + StatusCode = System.Net.HttpStatusCode.OK, + Content = new StringContent(content) + }); + } + } } } \ No newline at end of file diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Integration/ContentExtractors/Reddit/Client/RawRedditPostClientTests.cs b/tests/Elzik.Breef.Infrastructure.Tests.Integration/ContentExtractors/Reddit/Client/RawRedditPostClientTests.cs new file mode 100644 index 0000000..bc5022a --- /dev/null +++ b/tests/Elzik.Breef.Infrastructure.Tests.Integration/ContentExtractors/Reddit/Client/RawRedditPostClientTests.cs @@ -0,0 +1,149 @@ +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client.Raw; +using Refit; +using Shouldly; +using System.Globalization; + +namespace Elzik.Breef.Infrastructure.Tests.Integration.ContentExtractors.Reddit.Client; + +public class RawRedditPostClientTests +{ + private static bool IsRunningInGitHubWorkflow => Environment.GetEnvironmentVariable("GITHUB_ACTIONS") == "true"; + + [SkippableFact] + public async Task GetPost_ValidPostId_ReturnsRedditPost() + { + // Arrange + Skip.If(IsRunningInGitHubWorkflow, "Skipped because requests to reddit.com from GitHub workflows are " + + "always blocked meaning this test case always fails. This must be run locally instead."); + var client = RestService.For("https://www.reddit.com/"); + var postId = "1kqiwzc"; // https://www.reddit.com/r/learnprogramming/comments/1kqiwzc + + // Act + var redditPost = await client.GetPost(postId); + + // Assert + redditPost.ShouldNotBeNull(); + redditPost.Count.ShouldBe(2, "a reddit post is made up of two listings: one for the main post and one for the replies"); + redditPost[0].Data.ShouldNotBeNull(); + redditPost[0].Data.Children.ShouldNotBeNull(); + redditPost[0].Data.Children.Count.ShouldBe(1, "there is only a single main post"); + redditPost[0].Data.Children[0].Kind.ShouldBe("t3", "t3 represents the type of main post"); + redditPost[0].Data.Children[0].Data.ShouldNotBeNull(); + + var mainPost = redditPost[0].Data.Children[0].Data; + mainPost.Id.ShouldBe("1kqiwzc"); + mainPost.Author.ShouldBe("melvman1"); + mainPost.CreatedUtc.ShouldBe(DateTime.Parse("2025-05-19T18:18:05", CultureInfo.InvariantCulture)); + mainPost.SelfText.ShouldBe("I am just about to enter the programming world, and want to become a software " + + "engineer. This work ready college in Sweden has a 2 year long .net developer program with internships " + + "at real companies. They also have a similar program but with javascript.\n\nI am wondering if this " + + "would be a good path if my dream is to become a freelancer and I want to build easy apps / websites for " + + "small startups in Sweden/worldwide.\n\nThis is the program:\n\nProgramming C# – 12 weeks\n\nDevelopment " + + "against database and database administration – 9 weeks\n\nWeb development with .NET – 12 weeks\n\nAgile " + + "development – 6 weeks\n\nCustomer understanding, consulting and reporting – 3 weeks\n\nApprenticeship " + + "at companies – 12 weeks\n\nClean code – 6 weeks\n\nApprenticeship at companies – 16 weeks\n\nExam " + + "thesis – 4 weeks"); + mainPost.Content.ShouldBe(mainPost.SelfText); + + var replies = redditPost[1].Data.Children; + + replies.Count.ShouldBe(5); + + var firstReply = replies.Single(r => r.Data.Id == "mt7aaf6"); + firstReply.Kind.ShouldBe("t1"); + firstReply.Data.Author.ShouldBeOneOf("CodeRadDesign", "[deleted]"); + firstReply.Data.Body.ShouldBeOneOf( + "not really.\n\nas someone who's been freelance on and off for 30 years, you're looking for a more " + + "rounded skill set. \n\nyou're not going to compete with 'people from third world countries' like the " + + "other poster mentioned; you just can't. so you have to ask yourself, what do people in my area actually " + + "need. \n\nif the answer is (and it probably is) websites for their local businesses, then you want a mix " + + "of graphic art, html/css/js, a frontend tech like react or vue, and a backend tech. that could be C#.net" + + ", that could by python, lots of options.\n\nC# is definitely in demand, but not so much in freelance. " + + "for the most part a C#.net core specialist is going to be part of a team, at a company, and you'll defo " + + "want that college paper for that. if you're only planning on freelance, you can realistically just self " + + "learn. if you don't think you can handle the unstructuredness of self-learning..... you're going to hate " + + "freelancing. \n\notherwise looks like a fine program, i would likely favor taking something like that " + + "and planning on getting a Real Job though haha.\n\n*regarding your last point on your other comment \"" + + "c# looks easy to learn\" is not really a valid criteria. your first language is going to be the hardest" + + ", your second language will be ten times easier. c# is a good foundational language tho, i'd recommend " + + "it over python because it teaches a lot of good habits early.", + "[deleted]" + ); + + var secondReply = replies.Single(r => r.Data.Id == "mt7lqgx"); + secondReply.Kind.ShouldBe("t1"); + secondReply.Data.Author.ShouldBeOneOf("No_Researcher_7875", "[deleted]"); + secondReply.Data.Body.ShouldBeOneOf( + "As mentioned before it will be hard to compete with the experts but i think you are not thinking this " + + "correctly.\n\n If you want to build sites, is not that important in wich language you code them but how " + + "good and fast can you build them. \n\nThis program is a good start, and if you choose the js one would " + + "be a little better mostly for the front end part.\n\nAnyways chose whatever program you like the most " + + "and code, code a lot and you will be able to do what you want.", + "[deleted]" + ); + + var thirdReply = replies.Single(r => r.Data.Id == "mt606l6"); + thirdReply.Kind.ShouldBe("t1"); + thirdReply.Data.Author.ShouldBeOneOf("[deleted]"); + thirdReply.Data.Body.ShouldBeOneOf("[deleted]"); + + var fourthReply = replies.Single(r => r.Data.Id == "mt83c0a"); + fourthReply.Kind.ShouldBe("t1"); + fourthReply.Data.Author.ShouldBeOneOf("goqsane", "[deleted]"); + fourthReply.Data.Body.ShouldBeOneOf("No it’s not.", "[deleted]"); + + var fifthReply = replies.Single(r => r.Data.Id == "mt9gc9x"); + fifthReply.Kind.ShouldBe("t1"); + fifthReply.Data.Author.ShouldBeOneOf("ToThePillory", "[deleted]"); + fifthReply.Data.Body.ShouldBeOneOf( + "I got most of my freelancing work in C#, that and Java.\n\nThe problem is that you're a beginner, and " + + "freelancing doesn't really suit beginners, or even decent juniors.\n\nFreelancing means every single " + + "problem you encounter is 100% your responsibility to fix. There is no team to bounce ideas off, there " + + "is no manager to talk a client out of an idea, there is nobody other than you to solve \\*all\\* " + + "problems.\n\nI would aim to get a regular programming job first, freelancing is not easy, and generally " + + "pays less than a normal job.", + "[deleted]" + ); + + // Note: Replies is now object? to handle raw Reddit API response variations + // Testing nested replies structure is now handled in the transformer layer + thirdReply.Data.Replies.ShouldNotBeNull("just verify replies exist in some form"); + } + + [SkippableFact] + public async Task GetPost_PostWithGalleryDataNumericIds_HandlesDeserialization() + { + // Arrange + Skip.If(IsRunningInGitHubWorkflow, "Skipped because requests to reddit.com from GitHub workflows are " + + "always blocked meaning this test case always fails. This must be run locally instead."); + var client = RestService.For("https://www.reddit.com/"); + var postId = "1nzkay2"; // https://www.reddit.com/r/aircrashinvestigation/comments/1nzkay2/why_i_think_those_two_crashes_will_never_be_in/ + + // Act + var redditPost = await client.GetPost(postId); + + // Assert + redditPost.ShouldNotBeNull(); + redditPost.Count.ShouldBe(2, "a reddit post is made up of two listings: one for the main post and one for the replies"); + redditPost[0].Data.ShouldNotBeNull(); + redditPost[0].Data.Children.ShouldNotBeNull(); + redditPost[0].Data.Children.Count.ShouldBe(1, "there is only a single main post"); + redditPost[0].Data.Children[0].Kind.ShouldBe("t3", "t3 represents the type of main post"); + redditPost[0].Data.Children[0].Data.ShouldNotBeNull(); + + var mainPost = redditPost[0].Data.Children[0].Data; + mainPost.Id.ShouldBe("1nzkay2"); + mainPost.Title.ShouldNotBeNullOrEmpty(); + mainPost.Author.ShouldNotBeNullOrEmpty(); + mainPost.Subreddit.ShouldBe("aircrashinvestigation"); + + if (mainPost.IsGallery && mainPost.GalleryData?.Items != null) + { + foreach (var item in mainPost.GalleryData.Items) + { + item.Id.ShouldNotBeNull("Gallery item ID should be converted from number to string"); + item.MediaId.ShouldNotBeNull("Media ID should be present"); + } + } + } +} \ No newline at end of file diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Integration/ContentExtractors/Reddit/Client/RawSubredditClientTests.cs b/tests/Elzik.Breef.Infrastructure.Tests.Integration/ContentExtractors/Reddit/Client/RawSubredditClientTests.cs new file mode 100644 index 0000000..a9e6380 --- /dev/null +++ b/tests/Elzik.Breef.Infrastructure.Tests.Integration/ContentExtractors/Reddit/Client/RawSubredditClientTests.cs @@ -0,0 +1,59 @@ +using Refit; +using Shouldly; +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client.Raw; + +namespace Elzik.Breef.Infrastructure.Tests.Integration.ContentExtractors.Reddit.Client; + +public class RawSubredditClientTests +{ + private static bool IsRunningInGitHubWorkflow => Environment.GetEnvironmentVariable("GITHUB_ACTIONS") == "true"; + + [SkippableFact] + public async Task GetNewInSubreddit_ValidSubreddit_ReturnsNewInSubreddit() + { + // Arrange + Skip.If(IsRunningInGitHubWorkflow, "Skipped because requests to reddit.com from GitHub workflows are " + + "always blocked meaning this test case always fails. This must be run locally instead."); + var client = RestService.For("https://www.reddit.com/"); + + // Act + var newInSubreddit = await client.GetNewInSubreddit("reddit"); + + // Assert + newInSubreddit.ShouldNotBeNull(); + newInSubreddit.Data.ShouldNotBeNull(); + newInSubreddit.Data.Children.ShouldNotBeNull(); + newInSubreddit.Data.Children.Count.ShouldNotBe(0, "because at least one post will be returned"); + foreach (var child in newInSubreddit.Data.Children) + { + child.Data.ShouldNotBeNull(); + child.Data.Title.ShouldNotBeNullOrEmpty(); + child.Data.Author.ShouldNotBeNullOrEmpty(); + child.Data.SelfText.ShouldNotBeNull(); + child.Data.Url.ShouldNotBeNullOrEmpty(); + child.Data.Id.ShouldNotBeNullOrEmpty(); + } + } + + [SkippableFact] + public async Task GetAboutSubreddit_ValidSubreddit_ReturnsAboutSubreddit() + { + // Arrange + Skip.If(IsRunningInGitHubWorkflow, "Skipped because requests to reddit.com from GitHub workflows are " + + "always blocked meaning this test case always fails. This must be run locally instead."); + var client = RestService.For("https://www.reddit.com/"); + + // Act + var aboutSubreddit = await client.GetAboutSubreddit("reddit"); + + // Assert + aboutSubreddit.ShouldNotBeNull(); + aboutSubreddit.Data.ShouldNotBeNull(); + aboutSubreddit.Data.PublicDescription.ShouldNotBeNull(); + aboutSubreddit.Data.IconImg.ShouldNotBeNull(); + aboutSubreddit.Data.BannerImg.ShouldNotBeNull(); + aboutSubreddit.Data.BannerBackgroundImage.ShouldNotBeNull(); + aboutSubreddit.Data.MobileBannerImage.ShouldNotBeNull(); + aboutSubreddit.Data.CommunityIcon.ShouldNotBeNull(); + } +} diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Integration/ContentExtractors/Reddit/Client/RedditPostClientTests.cs b/tests/Elzik.Breef.Infrastructure.Tests.Integration/ContentExtractors/Reddit/Client/RedditPostClientTests.cs new file mode 100644 index 0000000..3b2d69b --- /dev/null +++ b/tests/Elzik.Breef.Infrastructure.Tests.Integration/ContentExtractors/Reddit/Client/RedditPostClientTests.cs @@ -0,0 +1,73 @@ +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client; +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client.Raw; +using Refit; +using Shouldly; + +namespace Elzik.Breef.Infrastructure.Tests.Integration.ContentExtractors.Reddit.Client; + +public class RedditPostClientTests +{ + private static bool IsRunningInGitHubWorkflow => Environment.GetEnvironmentVariable("GITHUB_ACTIONS") == "true"; + + [SkippableFact] + public async Task GetPost_ValidPostId_ReturnsExpectedRedditPost() + { + // Arrange + Skip.If(IsRunningInGitHubWorkflow, "Skipped because requests to reddit.com from GitHub workflows are " + + "always blocked meaning this test case always fails. This must be run locally instead."); + + var rawRedditClient = RestService.For("https://www.reddit.com/"); + var transformer = new RawRedditPostTransformer(); + var redditClient = new RedditPostClient(rawRedditClient, transformer); + var postId = "1kqiwzc"; // https://www.reddit.com/r/learnprogramming/comments/1kqiwzc + + // Act + var redditPost = await redditClient.GetPost(postId); + + // Assert + redditPost.ShouldNotBeNull(); + redditPost.Post.ShouldNotBeNull(); + redditPost.Post.Id.ShouldBe("1kqiwzc"); + redditPost.Post.Author.ShouldBeOneOf("melvman1", "[deleted]"); + redditPost.Post.Title.ShouldNotBeNullOrWhiteSpace(); + redditPost.Post.Content.ShouldNotBeNullOrWhiteSpace(); + redditPost.Post.ImageUrl.ShouldBeNull(); + redditPost.Comments.ShouldNotBeNull(); + redditPost.Comments.Count.ShouldBe(5); + var firstComment = redditPost.Comments.Single(c => c.Id == "mt7aaf6"); + firstComment.Author.ShouldBeOneOf("CodeRadDesign", "[deleted]"); + firstComment.Content.ShouldBeOneOf( + "not really.\n\nas someone who's been freelance on and off for 30 years, you're looking for a more " + + "rounded skill set. \n\nyou're not going to compete with 'people from third world countries' like the " + + "other poster mentioned; you just can't. so you have to ask yourself, what do people in my area actually " + + "need. \n\nif the answer is (and it probably is) websites for their local businesses, then you want a mix " + + "of graphic art, html/css/js, a frontend tech like react or vue, and a backend tech. that could be C#.net" + + ", that could by python, lots of options.\n\nC# is definitely in demand, but not so much in freelance. " + + "for the most part a C#.net core specialist is going to be part of a team, at a company, and you'll defo " + + "want that college paper for that. if you're only planning on freelance, you can realistically just self " + + "learn. if you don't think you can handle the unstructuredness of self-learning..... you're going to hate " + + "freelancing. \n\notherwise looks like a fine program, i would likely favor taking something like that " + + "and planning on getting a Real Job though haha.\n\n*regarding your last point on your other comment \"" + + "c# looks easy to learn\" is not really a valid criteria. your first language is going to be the hardest" + + ", your second language will be ten times easier. c# is a good foundational language tho, i'd recommend " + + "it over python because it teaches a lot of good habits early.", + "[deleted]" + ); + + var secondComment = redditPost.Comments.Single(c => c.Id == "mt7lqgx"); + secondComment.Author.ShouldBeOneOf("No_Researcher_7875", "[deleted]"); + + var thirdComment = redditPost.Comments.Single(c => c.Id == "mt606l6"); + thirdComment.Author.ShouldBeOneOf("[deleted]"); + thirdComment.Replies.ShouldNotBeNull(); + thirdComment.Replies.Count.ShouldBe(1); + var nestedReply = thirdComment.Replies.Single(r => r.Id == "mt60jnv"); + nestedReply.Author.ShouldBeOneOf("melvman1", "[deleted]"); + + var fourthComment = redditPost.Comments.Single(c => c.Id == "mt83c0a"); + fourthComment.Author.ShouldBeOneOf("goqsane", "[deleted]"); + + var fifthComment = redditPost.Comments.Single(c => c.Id == "mt9gc9x"); + fifthComment.Author.ShouldBeOneOf("ToThePillory", "[deleted]"); + } +} \ No newline at end of file diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Integration/ContentExtractors/Reddit/RedditPostContentExtractorTests.cs b/tests/Elzik.Breef.Infrastructure.Tests.Integration/ContentExtractors/Reddit/RedditPostContentExtractorTests.cs new file mode 100644 index 0000000..ddf6f70 --- /dev/null +++ b/tests/Elzik.Breef.Infrastructure.Tests.Integration/ContentExtractors/Reddit/RedditPostContentExtractorTests.cs @@ -0,0 +1,223 @@ +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit; +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client; +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client.Raw; +using Microsoft.Extensions.Options; +using NSubstitute; +using Refit; +using Shouldly; +using System.Text.Json; + +namespace Elzik.Breef.Infrastructure.Tests.Integration.ContentExtractors.Reddit; + +public sealed class RedditPostContentExtractorTests : IDisposable +{ + private static bool IsRunningInGitHubWorkflow => Environment.GetEnvironmentVariable("GITHUB_ACTIONS") == "true"; + + private readonly RedditPostContentExtractor _extractor; + private readonly HttpClient _httpClient; + + public RedditPostContentExtractorTests() + { + var rawRedditClient = RestService.For("https://www.reddit.com/"); + var transformer = new RawRedditPostTransformer(); + var redditPostClient = new RedditPostClient(rawRedditClient, transformer); + + var rawSubredditClient = RestService.For("https://www.reddit.com/"); + var rawNewInSubredditTransformer = new RawNewInSubredditTransformer(redditPostClient); + var subredditClient = new SubredditClient(rawSubredditClient, rawNewInSubredditTransformer); + + var redditOptions = Options.Create(new RedditOptions()); + var httpClientOptions = Options.Create(new HttpClientOptions { UserAgent = "breef-integration-tests" }); + + var mockHttpClientFactory = Substitute.For(); + _httpClient = new HttpClient(); + _httpClient.DefaultRequestHeaders.Add("User-Agent", httpClientOptions.Value.UserAgent); + _httpClient.Timeout = TimeSpan.FromSeconds(httpClientOptions.Value.TimeoutSeconds); + mockHttpClientFactory.CreateClient("BreefDownloader").Returns(_httpClient); + + var subredditImageExtractor = new SubredditContentExtractor(subredditClient, mockHttpClientFactory, redditOptions); + + _extractor = new RedditPostContentExtractor(redditPostClient, subredditImageExtractor, redditOptions); + } + + [SkippableTheory] + [InlineData("https://www.reddit.com/r/learnprogramming/comments/1kqiwzc")] + [InlineData("https://reddit.com/r/learnprogramming/comments/1kqiwzc/")] + [InlineData("https://www.reddit.com/r/learnprogramming/comments/1kqiwzc/title")] + public async Task ExtractAsync_RealRedditPost_ReturnsValidExtract(string url) + { + // Arrange + SkipIfInGitHubWorkflow(); + + // Act + var result = await _extractor.ExtractAsync(url); + + // Assert + result.ShouldNotBeNull(); + result.Title.ShouldNotBeNullOrWhiteSpace(); + result.Content.ShouldNotBeNullOrWhiteSpace(); + result.PreviewImageUrl.ShouldNotBeNullOrWhiteSpace(); + + var redditPost = JsonSerializer.Deserialize(result.Content); + redditPost.ShouldNotBeNull(); + redditPost.Post.ShouldNotBeNull(); + redditPost.Post.Id.ShouldBe("1kqiwzc"); + redditPost.Post.Title.ShouldNotBeNullOrWhiteSpace(); + redditPost.Comments.ShouldNotBeNull(); + } + + [SkippableFact] + public async Task ExtractAsync_PostWithImage_UsesPostImage() + { + // Arrange + SkipIfInGitHubWorkflow(); + + var urlWithKnownGoodImage = "https://www.reddit.com/r/BBQ/comments/1nxust6/have_anyone_use_coconut_shell_as_smoke"; + + // Act + var result = await _extractor.ExtractAsync(urlWithKnownGoodImage); + + // Assert + result.ShouldNotBeNull(); + result.PreviewImageUrl.ShouldNotBeNull(); + result.PreviewImageUrl.ShouldBe("https://preview.redd.it/olmpl5vmp3tf1.jpeg?auto=webp&s=1cb106a6fab1ddd48bcf8e9afdd2a06ca22d46ba"); + } + + [SkippableFact] + public async Task ExtractAsync_PostWithoutImage_UsesSubredditFallback() + { + // Arrange + SkipIfInGitHubWorkflow(); + + var urlWithNoImage = "https://www.reddit.com/r/bristol/comments/1nzoyrd/parking_near_cotham_school"; + + // Act + var result = await _extractor.ExtractAsync(urlWithNoImage); + + // Assert + result.ShouldNotBeNull(); + result.PreviewImageUrl.ShouldNotBeNull(); + result.PreviewImageUrl.ShouldBe("https://b.thumbs.redditmedia.com/fMCtUDLMEEt1SrDtRyg1v1xiXVoXmP_3dxScj1kgzoE.png"); + } + + [SkippableFact] + public async Task ExtractAsync_PostAndSubredditWithoutImage_UsesRedditFallback() + { + // Arrange + SkipIfInGitHubWorkflow(); + + var urlWithNoImage = "https://www.reddit.com/r/PleX/comments/1nsxi8p/the_recent_data_breach_looks_to_have_been_made"; + + // Act + var result = await _extractor.ExtractAsync(urlWithNoImage); + + // Assert + result.ShouldNotBeNull(); + result.PreviewImageUrl.ShouldNotBeNull(); + result.PreviewImageUrl.ShouldBe("https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Lockup_Logo.svg"); + } + + [SkippableFact] + public async Task ExtractAsync_ValidPost_ContentContainsCompleteRedditStructure() + { + // Arrange + SkipIfInGitHubWorkflow(); + + var url = "https://www.reddit.com/r/learnprogramming/comments/1kqiwzc"; + + // Act + var result = await _extractor.ExtractAsync(url); + + // Assert + var redditPost = JsonSerializer.Deserialize(result.Content); + redditPost.ShouldNotBeNull(); + redditPost.Post.Id.ShouldNotBeNullOrEmpty(); + redditPost.Post.Title.ShouldNotBeNullOrEmpty(); + redditPost.Post.Author.ShouldNotBeNullOrEmpty(); + redditPost.Post.Subreddit.ShouldNotBeNullOrEmpty(); + redditPost.Post.CreatedUtc.ShouldNotBe(default); + redditPost.Comments.ShouldNotBeNull(); + if (redditPost.Comments.Count != 0) + { + var firstComment = redditPost.Comments[0]; + firstComment.Id.ShouldNotBeNullOrEmpty(); + firstComment.CreatedUtc.ShouldNotBe(default); + } + } + + [SkippableTheory] + [InlineData("not-a-url")] + [InlineData("https://reddit.com")] + [InlineData("https://reddit.com/r/programming")] + [InlineData("https://reddit.com/r/programming/posts/abc123/title")] + [InlineData("https://not-reddit.com/r/programming/comments/abc123/title")] + public async Task ExtractAsync_InvalidUrls_ThrowsInvalidOperationException(string invalidUrl) + { + // Arrange + SkipIfInGitHubWorkflow(); + + // Act & Assert + await Should.ThrowAsync(() => _extractor.ExtractAsync(invalidUrl)); + } + + [SkippableFact] + public async Task ExtractAsync_NonExistentPost_ThrowsExpectedException() + { + // Arrange + SkipIfInGitHubWorkflow(); + + var url = "https://www.reddit.com/r/programming/comments/nonexistent123/title"; + + // Act + var ex = await Should.ThrowAsync(() => _extractor.ExtractAsync(url)); + + // Assert + ex.Message.ShouldBe("Response status code does not indicate success: 404 (Not Found)."); + } + + [Theory] + [InlineData("https://reddit.com/r/programming/comments/abc123/title")] + [InlineData("https://reddit.com/r/programming/comments/abc123")] + [InlineData("https://www.reddit.com/r/funny/comments/def456/joke")] + [InlineData("https://www.reddit.com/r/funny/comments/def456")] + [InlineData("https://REDDIT.COM/r/AskReddit/comments/ghi789/question")] + [InlineData("https://REDDIT.COM/r/AskReddit/comments/ghi789")] + [InlineData("https://reddit.com/r/pics/comments/jkl012/image/")] + [InlineData("https://reddit.com/r/pics/comments/jkl012/")] + public void CanHandle_VariousValidUrls_ReturnsTrue(string validUrl) + { + // Act + var canHandle = _extractor.CanHandle(validUrl); + + // Assert + canHandle.ShouldBeTrue($"Should handle URL: {validUrl}"); + } + + [Theory] + [InlineData("https://reddit.com/r/programming")] + [InlineData("https://reddit.com/r/programming/hot")] + [InlineData("https://reddit.com/r/programming/comments")] + [InlineData("https://reddit.com/r/programming/comments/abc123/title/extra")] + [InlineData("https://reddit.com/user/username/comments/abc123/title")] + [InlineData("https://old.reddit.com/r/programming/comments/abc123/title")] + [InlineData("https://youtube.com/r/programming/comments/abc123/title")] + public void CanHandle_VariousInvalidUrls_ReturnsFalse(string invalidUrl) + { + // Act + var canHandle = _extractor.CanHandle(invalidUrl); + + // Assert + canHandle.ShouldBeFalse($"Should not handle URL: {invalidUrl}"); + } + + private static void SkipIfInGitHubWorkflow(string reason = "Skipped because requests to reddit.com from GitHub workflows " + + "are always blocked meaning this test case always fails. This must be run locally instead.") + { + Skip.If(IsRunningInGitHubWorkflow, reason); + } + + public void Dispose() + { + _httpClient?.Dispose(); + } +} \ No newline at end of file diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Integration/Elzik.Breef.Infrastructure.Tests.Integration.csproj b/tests/Elzik.Breef.Infrastructure.Tests.Integration/Elzik.Breef.Infrastructure.Tests.Integration.csproj index f10e5a8..625167c 100644 --- a/tests/Elzik.Breef.Infrastructure.Tests.Integration/Elzik.Breef.Infrastructure.Tests.Integration.csproj +++ b/tests/Elzik.Breef.Infrastructure.Tests.Integration/Elzik.Breef.Infrastructure.Tests.Integration.csproj @@ -1,4 +1,4 @@ - + net8.0 @@ -28,7 +28,15 @@ + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + all diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Integration/TestOutputLoggerProvider.cs b/tests/Elzik.Breef.Infrastructure.Tests.Integration/TestOutputLoggerProvider.cs index d417cfd..a7ff65f 100644 --- a/tests/Elzik.Breef.Infrastructure.Tests.Integration/TestOutputLoggerProvider.cs +++ b/tests/Elzik.Breef.Infrastructure.Tests.Integration/TestOutputLoggerProvider.cs @@ -1,20 +1,19 @@ using Microsoft.Extensions.Logging; using Xunit.Abstractions; -namespace Elzik.Breef.Infrastructure.Tests.Integration +namespace Elzik.Breef.Infrastructure.Tests.Integration; + +public sealed class TestOutputLoggerProvider(ITestOutputHelper testOutputHelper) : ILoggerProvider { - public class TestOutputLoggerProvider(ITestOutputHelper testOutputHelper) : ILoggerProvider - { - private readonly ITestOutputHelper _testOutputHelper = testOutputHelper; + private readonly ITestOutputHelper _testOutputHelper = testOutputHelper; - public ILogger CreateLogger(string categoryName) - { - return new TestOutputLogger(_testOutputHelper, categoryName); - } + public ILogger CreateLogger(string categoryName) + { + return new TestOutputLogger(_testOutputHelper, categoryName); + } - public void Dispose() - { - GC.SuppressFinalize(this); - } + public void Dispose() + { + // Nothing to dispose } } diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Integration/Wallabag/WallabagClientTests.cs b/tests/Elzik.Breef.Infrastructure.Tests.Integration/Wallabag/WallabagClientTests.cs index 401bce6..ac79d19 100644 --- a/tests/Elzik.Breef.Infrastructure.Tests.Integration/Wallabag/WallabagClientTests.cs +++ b/tests/Elzik.Breef.Infrastructure.Tests.Integration/Wallabag/WallabagClientTests.cs @@ -1,9 +1,7 @@ using Elzik.Breef.Infrastructure.Wallabag; using Microsoft.Extensions.Configuration; -using Newtonsoft.Json; using Refit; using Shouldly; -using System.Diagnostics; using Xunit.Abstractions; namespace Elzik.Breef.Infrastructure.Tests.Integration.Wallabag diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Integration/Wallabag/WallabagOptionsTests.cs b/tests/Elzik.Breef.Infrastructure.Tests.Integration/Wallabag/WallabagOptionsTests.cs index f1c3e77..2afa87b 100644 --- a/tests/Elzik.Breef.Infrastructure.Tests.Integration/Wallabag/WallabagOptionsTests.cs +++ b/tests/Elzik.Breef.Infrastructure.Tests.Integration/Wallabag/WallabagOptionsTests.cs @@ -2,7 +2,6 @@ using Microsoft.Extensions.Options; using Shouldly; using Elzik.Breef.Infrastructure.Wallabag; -using Newtonsoft.Json.Linq; namespace Elzik.Breef.Infrastructure.Tests.Integration.Wallabag; diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Integration/WebPageDownLoaderOptionsTests.cs b/tests/Elzik.Breef.Infrastructure.Tests.Integration/WebPageDownLoaderOptionsTests.cs deleted file mode 100644 index 77b23bb..0000000 --- a/tests/Elzik.Breef.Infrastructure.Tests.Integration/WebPageDownLoaderOptionsTests.cs +++ /dev/null @@ -1,44 +0,0 @@ -using Microsoft.Extensions.DependencyInjection; -using Microsoft.Extensions.Options; -using Shouldly; - -namespace Elzik.Breef.Infrastructure.Tests.Integration; - -public class WebPageDownLoaderOptionsTests -{ - [Fact] - public void WhenValidated_MissingUserAgent_ShouldFailValidation() - { - // Arrange - var services = new ServiceCollection(); - services.AddOptions() - .Configure(o => o.UserAgent = string.Empty) - .ValidateDataAnnotations(); - var provider = services.BuildServiceProvider(); - var options = provider.GetRequiredService>(); - - // Act - var ex = Assert.Throws(() => options.Value); - - // Assert - ex.Message.ShouldBe("DataAnnotation validation failed for 'WebPageDownLoaderOptions' members: " + - "'UserAgent' with the error: 'The UserAgent field is required.'."); - } - [Fact] - public void WhenValidated_WithValidUserAgent_ShouldPassValidation() - { - // Arrange - var services = new ServiceCollection(); - services.AddOptions() - .Configure(o => o.UserAgent = "TestAgent/1.0") - .ValidateDataAnnotations(); - var provider = services.BuildServiceProvider(); - var options = provider.GetRequiredService>(); - - // Act - var value = options.Value; - - // Assert - value.UserAgent.ShouldBe("TestAgent/1.0"); - } -} diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Integration/WebPageDownloaderTests.cs b/tests/Elzik.Breef.Infrastructure.Tests.Integration/WebPageDownloaderTests.cs deleted file mode 100644 index 9098208..0000000 --- a/tests/Elzik.Breef.Infrastructure.Tests.Integration/WebPageDownloaderTests.cs +++ /dev/null @@ -1,76 +0,0 @@ -using Microsoft.Extensions.Options; -using Shouldly; -using Xunit.Abstractions; - -namespace Elzik.Breef.Infrastructure.Tests.Integration -{ - public class WebPageDownloaderTests(ITestOutputHelper testOutputHelper) - { - - private readonly IOptions _defaultOptions = Options.Create(new WebPageDownLoaderOptions()); - private readonly TestOutputFakeLogger _testOutputFakeLogger = new(testOutputHelper); - private static bool IsRunningInGitHubWorkflow => Environment.GetEnvironmentVariable("GITHUB_ACTIONS") == "true"; - - [Fact] - public async Task DownloadAsync_WithUrlFromStaticPage_ReturnsString() - { - // Arrange - var testUrl = "https://elzik.github.io/test-web/test.html"; - - // Act - var httpClient = new WebPageDownloader(_testOutputFakeLogger, _defaultOptions); - var result = await httpClient.DownloadAsync(testUrl); - - // Assert - var expectedSource = await File.ReadAllTextAsync("../../../../TestData/StaticTestPage.html"); - - var lineEndingNormalisedExpected = NormaliseLineEndings(expectedSource); - var lineEndingNormalisedResult = NormaliseLineEndings(result); - - lineEndingNormalisedResult.ShouldBe(lineEndingNormalisedExpected); - } - - [Fact] - public async Task DownloadAsync_WithUrlFromStaticPage_LogsUserAgent() - { - // Arrange - var testUrl = "https://elzik.github.io/test-web/test.html"; - - // Act - var httpClient = new WebPageDownloader(_testOutputFakeLogger, _defaultOptions); - await httpClient.DownloadAsync(testUrl); - - // Assert - var logCollector = _testOutputFakeLogger.FakeLogger.Collector; - logCollector.Count.ShouldBe(1); - _testOutputFakeLogger.FakeLogger.Collector.LatestRecord.Level.ShouldBe( - Microsoft.Extensions.Logging.LogLevel.Information); - _testOutputFakeLogger.FakeLogger.Collector.LatestRecord.Message.ShouldBe( - "Downloads will be made using the User-Agent: Mozilla/5.0, (Windows NT 10.0; Win64; x64), AppleWebKit/537.36, (KHTML, like Gecko), Chrome/110.0.0.0, Safari/537.36"); - - } - - [SkippableTheory] - [InlineData("https://reddit.com")] - [InlineData("https://stackoverflow.com/")] - public async Task DownloadAsync_ForBlockedSites_ThwartsBlock(string testUrl) - { - // Arrange - Skip.If(IsRunningInGitHubWorkflow && testUrl == "https://reddit.com", - "Skipped because requests to reddit.com from GitHub workflows are always " + - "blocked meaning this test case always fails. This must be run locally instead."); - - // Act - var httpClient = new WebPageDownloader(_testOutputFakeLogger, _defaultOptions); - var result = await httpClient.DownloadAsync(testUrl); - - // Assert - result.ShouldNotBeNull(); - } - - private static string NormaliseLineEndings(string text) - { - return text.Replace("\r\n", "\n"); - } - } -} \ No newline at end of file diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/ContentExtractorStrategyTests.cs b/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/ContentExtractorStrategyTests.cs new file mode 100644 index 0000000..1a9c15c --- /dev/null +++ b/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/ContentExtractorStrategyTests.cs @@ -0,0 +1,181 @@ +using Elzik.Breef.Domain; +using Elzik.Breef.Infrastructure.ContentExtractors; +using Microsoft.Extensions.Logging.Testing; +using NSubstitute; +using Shouldly; + +namespace Elzik.Breef.Infrastructure.Tests.Unit.ContentExtractors; + +public class ContentExtractorStrategyTests +{ + private readonly Extract _extractedByExtractor1 = new("Title1", "Content1", "Image1"); + private readonly Extract _extractedByExtractor2 = new("Title2", "Content2", "Image2"); + private readonly Extract _extractedByDefaultExtractor = new("DefaultTitle", "DefaultContent", "DefaultImage"); + + private readonly IContentExtractor _extractor1 = Substitute.For(); + private readonly IContentExtractor _extractor2 = Substitute.For(); + private readonly IContentExtractor _defaultExtractor = Substitute.For(); + + private readonly ContentExtractorStrategy _contentExtractorStrategy; + + private readonly FakeLogger _fakeLogger; + + + public ContentExtractorStrategyTests() + { + _extractor1.ExtractAsync(Arg.Any()) + .Returns(ci => { return Task.FromResult(_extractedByExtractor1); }); + _extractor2.ExtractAsync(Arg.Any()) + .Returns(ci => { return Task.FromResult(_extractedByExtractor2); }); + _defaultExtractor.ExtractAsync(Arg.Any()) + .Returns(ci => { return Task.FromResult(_extractedByDefaultExtractor); }); + _defaultExtractor.CanHandle(Arg.Any()).Returns(true); + + _fakeLogger = new FakeLogger(); + + _contentExtractorStrategy = new ContentExtractorStrategy(_fakeLogger, [_extractor1, _extractor2], _defaultExtractor); + } + + [Fact] + public async Task ExtractAsync_Extractor1CanHandle_UsesExtractor1() + { + // Arrange + _extractor1.CanHandle(Arg.Any()).Returns(true); + _extractor2.CanHandle(Arg.Any()).Returns(false); + _extractor2.ExtractAsync(Arg.Any()).Returns>(_ => + throw new InvalidOperationException("This extractor (2) should not be used.")); + + + // Act + var extract = await _contentExtractorStrategy.ExtractAsync("http://test"); + + // Assert + extract.ShouldBe(_extractedByExtractor1); + _fakeLogger.Collector.Count.ShouldBe(1); + _fakeLogger.Collector.LatestRecord.Level.ShouldBe(Microsoft.Extensions.Logging.LogLevel.Information); + _fakeLogger.Collector.LatestRecord.Message.ShouldStartWith( + $"Extraction will be provided for by {_extractor1.GetType().Name}"); + } + + [Fact] + public async Task ExtractAsync_Extractor2CanHandle_UsesExtractor2() + { + // Arrange + _extractor1.CanHandle(Arg.Any()).Returns(false); + _extractor1.ExtractAsync(Arg.Any()).Returns>(_ => + throw new InvalidOperationException("This extractor (1) should not be used.")); + _extractor2.CanHandle(Arg.Any()).Returns(true); + + // Act + var extract = await _contentExtractorStrategy.ExtractAsync("http://test"); + + // Assert + extract.ShouldBe(_extractedByExtractor2); + _fakeLogger.Collector.Count.ShouldBe(1); + _fakeLogger.Collector.LatestRecord.Level.ShouldBe(Microsoft.Extensions.Logging.LogLevel.Information); + _fakeLogger.Collector.LatestRecord.Message.ShouldStartWith( + $"Extraction will be provided for by {_extractor2.GetType().Name}"); + } + + [Fact] + public async Task ExtractAsync_NoSpecificExtractorCanHandle_UsesDefaultExtractor() + { + // Arrange + _extractor1.CanHandle(Arg.Any()).Returns(false); + _extractor1.ExtractAsync(Arg.Any()).Returns>(_ => + throw new InvalidOperationException("This extractor (1) should not be used.")); + _extractor2.CanHandle(Arg.Any()).Returns(false); + _extractor2.ExtractAsync(Arg.Any()).Returns>(_ => + throw new InvalidOperationException("This extractor (2) should not be used.")); + + // Act + var extract = await _contentExtractorStrategy.ExtractAsync("http://test"); + + // Assert + extract.ShouldBe(_extractedByDefaultExtractor); + _fakeLogger.Collector.Count.ShouldBe(1); + _fakeLogger.Collector.LatestRecord.Level.ShouldBe(Microsoft.Extensions.Logging.LogLevel.Information); + _fakeLogger.Collector.LatestRecord.Message.ShouldStartWith( + $"Extraction will be provided for by {_defaultExtractor.GetType().Name}"); + } + + [Fact] + public async Task ExtractAsync_OnlyDefaultExtractorExists_UsesDefaultExtractor() + { + // Arrange + _extractor1.CanHandle(Arg.Any()).Returns(true); + _extractor1.ExtractAsync(Arg.Any()).Returns>(_ => + throw new InvalidOperationException("This extractor (1) should not be used.")); + _extractor2.CanHandle(Arg.Any()).Returns(true); + _extractor2.ExtractAsync(Arg.Any()).Returns>(_ => + throw new InvalidOperationException("This extractor (2) should not be used.")); + + // Act + var defaultOnlyContentExtractorStrategy = new ContentExtractorStrategy(_fakeLogger, [], _defaultExtractor); + var extract = await defaultOnlyContentExtractorStrategy.ExtractAsync("http://test"); + + // Assert + extract.ShouldBe(_extractedByDefaultExtractor); + _fakeLogger.Collector.Count.ShouldBe(1); + _fakeLogger.Collector.LatestRecord.Level.ShouldBe(Microsoft.Extensions.Logging.LogLevel.Information); + _fakeLogger.Collector.LatestRecord.Message.ShouldStartWith( + $"Extraction will be provided for by {_defaultExtractor.GetType().Name}"); + } + + [Fact] + public void CanHandle_AnyString_CanHandle() + { + // Act + var defaultOnlyContentExtractorStrategy = new ContentExtractorStrategy(_fakeLogger, [], _defaultExtractor); + var canHandleAnyString = defaultOnlyContentExtractorStrategy.CanHandle("Any string."); + + // Assert + canHandleAnyString.ShouldBeTrue(); + } + + [Fact] + public void Instantiated_DefaultExtractorInSpecificExtractors_Throws() + { + // Arrange + var extractor = Substitute.For(); + + // Act + var ex = Assert.Throws(() => + new ContentExtractorStrategy(_fakeLogger, [extractor], extractor)); + + // Assert + ex.Message.ShouldBe("Default extractor should not be in the specific extractors list."); + } + + [Fact] + public void Instantiated_NullDefaultExtractor_Throws() + { + // Arrange + var extractor = Substitute.For(); + + // Act +#pragma warning disable CS8625 // Cannot convert null literal to non-nullable reference type. + var ex = Assert.Throws(() => + new ContentExtractorStrategy(_fakeLogger, [extractor], null)); +#pragma warning restore CS8625 // Cannot convert null literal to non-nullable reference type. + + // Act + ex.Message.ShouldBe("Value cannot be null. (Parameter 'defaultExtractor')"); + } + + [Fact] + public void Instantiated_NullSpecificExtractors_Throws() + { + // Arrange + var defaultExtractor = Substitute.For(); + + // Act +#pragma warning disable CS8625 // Cannot convert null literal to non-nullable reference type. + var ex = Assert.Throws(() => + new ContentExtractorStrategy(_fakeLogger, null, defaultExtractor)); +#pragma warning restore CS8625 // Cannot convert null literal to non-nullable reference type. + + // Act + ex.Message.ShouldBe("Value cannot be null. (Parameter 'specificExtractors')"); + } +} diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/Client/Raw/FlexibleStringConverterTests.cs b/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/Client/Raw/FlexibleStringConverterTests.cs new file mode 100644 index 0000000..fbef272 --- /dev/null +++ b/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/Client/Raw/FlexibleStringConverterTests.cs @@ -0,0 +1,210 @@ +using System.Text.Json; +using Shouldly; + +namespace Elzik.Breef.Infrastructure.Tests.Unit.ContentExtractors.Reddit.Client.Raw; + +public class FlexibleStringConverterTests +{ + private readonly JsonSerializerOptions _optionsWithConverter = new() + { + Converters = { new Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client.Raw.FlexibleStringConverter() } + }; + + [Fact] + public void Read_NumericValue_ReturnsStringRepresentation() + { + // Arrange + var numericJson = "123456"; + + // Act + var result = JsonSerializer.Deserialize(numericJson, _optionsWithConverter); + + // Assert + result.ShouldBe("123456"); + } + + [Fact] + public void Read_StringValue_ReturnsString() + { + // Arrange + var stringJson = "\"test123\""; + + // Act + var result = JsonSerializer.Deserialize(stringJson, _optionsWithConverter); + + // Assert + result.ShouldBe("test123"); + } + + [Fact] + public void Read_NullValue_ReturnsNull() + { + // Arrange + var nullJson = "null"; + + // Act + var result = JsonSerializer.Deserialize(nullJson, _optionsWithConverter); + + // Assert + result.ShouldBeNull(); + } + + [Fact] + public void Read_LargeIntegerValue_ReturnsStringRepresentation() + { + // Arrange + var largeIntegerJson = Int64.MaxValue.ToString(); + + // Act + var result = JsonSerializer.Deserialize(largeIntegerJson, _optionsWithConverter); + + // Assert + result.ShouldBe(Int64.MaxValue.ToString()); + } + + [Fact] + public void Read_BooleanValue_ThrowsJsonException() + { + // Arrange + var booleanJson = "true"; + + // Act & Assert + var exception = Should.Throw(() => JsonSerializer.Deserialize(booleanJson, _optionsWithConverter)); + exception.Message.ShouldBe("Cannot convert True to string"); + } + + [Fact] + public void Read_WithGalleryItemModel_HandlesNumericId() + { + // Arrange + var galleryItemJson = """ + { + "media_id": "abc123", + "id": 456789 + } + """; + + // Act + var result = JsonSerializer.Deserialize(galleryItemJson); + + // Assert + result.ShouldNotBeNull(); + result.MediaId.ShouldBe("abc123"); + result.Id.ShouldBe("456789"); + } + + [Fact] + public void Read_WithRedditPostStructure_HandlesGalleryDataWithNumericIds() + { + var redditPostWithNumericGalleryDataIds = """ + [ + { + "kind": "Listing", + "data": { + "children": [ + { + "kind": "t3", + "data": { + "id": "1nzkay2", + "title": "Test Post", + "is_gallery": true, + "gallery_data": { + "items": [ + { + "media_id": "abc123", + "id": 456789 + }, + { + "media_id": "def456", + "id": 789012 + } + ] + } + } + } + ] + } + } + ] + """; + + // Act + var result = JsonSerializer.Deserialize(redditPostWithNumericGalleryDataIds); + + // Assert + result.ShouldNotBeNull(); + result.Count.ShouldBe(1); + + var postData = result[0].Data.Children[0].Data; + postData.Id.ShouldBe("1nzkay2"); + postData.IsGallery.ShouldBeTrue(); + postData.GalleryData.ShouldNotBeNull(); + postData.GalleryData.Items.ShouldNotBeNull(); + postData.GalleryData.Items.Count.ShouldBe(2); + postData.GalleryData.Items[0].Id.ShouldBe("456789"); + postData.GalleryData.Items[1].Id.ShouldBe("789012"); + postData.GalleryData.Items[0].MediaId.ShouldBe("abc123"); + postData.GalleryData.Items[1].MediaId.ShouldBe("def456"); + } + + [Fact] + public void Write_StringValue_WritesStringValue() + { + // Arrange + var value = "test123"; + + // Act + var result = JsonSerializer.Serialize(value, _optionsWithConverter); + + // Assert + result.ShouldBe("\"test123\""); + } + + [Fact] + public void Write_NullValue_WritesNull() + { + // Arrange + string? value = null; + + // Act + var result = JsonSerializer.Serialize(value, _optionsWithConverter); + + // Assert + result.ShouldBe("null"); + } + + [Fact] + public void Read_DirectNull_CallsConverter() + { + // Arrange + var converter = new Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client.Raw.FlexibleStringConverter(); + var options = new JsonSerializerOptions(); + var jsonUtf8 = "null"u8.ToArray(); + var reader = new Utf8JsonReader(jsonUtf8); + reader.Read(); // Position the reader on the null token + + // Act + var result = converter.Read(ref reader, typeof(string), options); + + // Assert + result.ShouldBeNull(); + } + + [Fact] + public void Write_DirectNull_CallsConverter() + { + // Arrange + var converter = new Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client.Raw.FlexibleStringConverter(); + var options = new JsonSerializerOptions(); + using var stream = new MemoryStream(); + using var writer = new Utf8JsonWriter(stream); + + // Act + converter.Write(writer, null, options); + writer.Flush(); + + // Assert + var json = System.Text.Encoding.UTF8.GetString(stream.ToArray()); + json.ShouldBe("null"); + } +} \ No newline at end of file diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/Client/RawNewInSubredditTransformerTests.cs b/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/Client/RawNewInSubredditTransformerTests.cs new file mode 100644 index 0000000..e81949b --- /dev/null +++ b/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/Client/RawNewInSubredditTransformerTests.cs @@ -0,0 +1,406 @@ +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client; +using NSubstitute; +using Shouldly; + +namespace Elzik.Breef.Infrastructure.Tests.Unit.ContentExtractors.Reddit.Client; + +public class RawNewInSubredditTransformerTests +{ + private readonly IRedditPostClient _redditPostClient; + private readonly RawNewInSubredditTransformer _transformer; + + public RawNewInSubredditTransformerTests() + { + _redditPostClient = Substitute.For(); + _transformer = new RawNewInSubredditTransformer(_redditPostClient); + } + + [Fact] + public async Task Transform_ValidRawNewInSubreddit_ReturnsExpectedStructure() + { + // Arrange + var rawNewInSubreddit = new RawNewInSubreddit + { + Data = new RawListingData + { + Children = + [ + new RawChild + { + Data = new RawPostData + { + Id = "post1", + Title = "Test Post 1", + Author = "author1" + } + }, + new RawChild + { + Data = new RawPostData + { + Id = "post2", + Title = "Test Post 2", + Author = "author2" + } + } + ] + } + }; + + var redditPost1 = new RedditPost + { + Post = new RedditPostContent + { + Id = "post1", + Title = "Test Post 1", + Author = "author1", + Score = 100, + Content = "Content 1", + CreatedUtc = new DateTime(2025, 1, 1, 12, 0, 0, DateTimeKind.Utc) + }, + Comments = [] + }; + + var redditPost2 = new RedditPost + { + Post = new RedditPostContent + { + Id = "post2", + Title = "Test Post 2", + Author = "author2", + Score = 200, + Content = "Content 2", + CreatedUtc = new DateTime(2025, 1, 1, 13, 0, 0, DateTimeKind.Utc) + }, + Comments = [] + }; + + _redditPostClient.GetPost("post1").Returns(Task.FromResult(redditPost1)); + _redditPostClient.GetPost("post2").Returns(Task.FromResult(redditPost2)); + + // Act + var result = await _transformer.Transform(rawNewInSubreddit); + + // Assert + result.ShouldNotBeNull(); + result.Posts.ShouldNotBeNull(); + result.Posts.Count.ShouldBe(2); + + var firstPost = result.Posts[0]; + firstPost.Post.Id.ShouldBe("post1"); + firstPost.Post.Title.ShouldBe("Test Post 1"); + firstPost.Post.Author.ShouldBe("author1"); + firstPost.Post.Score.ShouldBe(100); + firstPost.Post.Content.ShouldBe("Content 1"); + + var secondPost = result.Posts[1]; + secondPost.Post.Id.ShouldBe("post2"); + secondPost.Post.Title.ShouldBe("Test Post 2"); + secondPost.Post.Author.ShouldBe("author2"); + secondPost.Post.Score.ShouldBe(200); + secondPost.Post.Content.ShouldBe("Content 2"); + } + + [Fact] + public async Task Transform_EmptyChildren_ReturnsEmptyNewInSubreddit() + { + // Arrange + var rawNewInSubreddit = new RawNewInSubreddit + { + Data = new RawListingData + { + Children = [] + } + }; + + // Act + var result = await _transformer.Transform(rawNewInSubreddit); + + // Assert + result.ShouldNotBeNull(); + result.Posts.ShouldNotBeNull(); + result.Posts.Count.ShouldBe(0); + } + + [Fact] + public async Task Transform_NullChildren_ReturnsEmptyNewInSubreddit() + { + // Arrange + var rawNewInSubreddit = new RawNewInSubreddit + { + Data = new RawListingData + { + Children = null + } + }; + + // Act + var result = await _transformer.Transform(rawNewInSubreddit); + + // Assert + result.ShouldNotBeNull(); + result.Posts.ShouldNotBeNull(); + result.Posts.Count.ShouldBe(0); + } + + [Fact] + public async Task Transform_NullData_ReturnsEmptyNewInSubreddit() + { + // Arrange + var rawNewInSubreddit = new RawNewInSubreddit + { + Data = null + }; + + // Act + var result = await _transformer.Transform(rawNewInSubreddit); + + // Assert + result.ShouldNotBeNull(); + result.Posts.ShouldNotBeNull(); + result.Posts.Count.ShouldBe(0); + } + + [Fact] + public async Task Transform_ChildrenWithNullData_SkipsNullDataChildren() + { + // Arrange + var rawNewInSubreddit = new RawNewInSubreddit + { + Data = new RawListingData + { + Children = + [ + new RawChild + { + Data = new RawPostData + { + Id = "post1", + Title = "Valid Post" + } + }, + new RawChild + { + Data = null + }, + new RawChild + { + Data = new RawPostData + { + Id = "post2", + Title = "Another Valid Post" + } + } + ] + } + }; + + var redditPost1 = new RedditPost + { + Post = new RedditPostContent { Id = "post1", Title = "Valid Post" }, + Comments = [] + }; + + var redditPost2 = new RedditPost + { + Post = new RedditPostContent { Id = "post2", Title = "Another Valid Post" }, + Comments = [] + }; + + _redditPostClient.GetPost("post1").Returns(Task.FromResult(redditPost1)); + _redditPostClient.GetPost("post2").Returns(Task.FromResult(redditPost2)); + + // Act + var result = await _transformer.Transform(rawNewInSubreddit); + + // Assert + result.ShouldNotBeNull(); + result.Posts.Count.ShouldBe(2); + result.Posts[0].Post.Id.ShouldBe("post1"); + result.Posts[1].Post.Id.ShouldBe("post2"); + } + + [Fact] + public async Task Transform_ChildrenWithNullIds_SkipsNullIdChildren() + { + // Arrange + var rawNewInSubreddit = new RawNewInSubreddit + { + Data = new RawListingData + { + Children = + [ + new RawChild + { + Data = new RawPostData + { + Id = "post1", + Title = "Valid Post" + } + }, + new RawChild + { + Data = new RawPostData + { + Id = null, // This should be skipped + Title = "Post with null ID" + } + }, + new RawChild + { + Data = new RawPostData + { + Id = "post2", + Title = "Another Valid Post" + } + } + ] + } + }; + + var redditPost1 = new RedditPost + { + Post = new RedditPostContent { Id = "post1", Title = "Valid Post" }, + Comments = [] + }; + + var redditPost2 = new RedditPost + { + Post = new RedditPostContent { Id = "post2", Title = "Another Valid Post" }, + Comments = [] + }; + + _redditPostClient.GetPost("post1").Returns(Task.FromResult(redditPost1)); + _redditPostClient.GetPost("post2").Returns(Task.FromResult(redditPost2)); + + // Act + var result = await _transformer.Transform(rawNewInSubreddit); + + // Assert + result.ShouldNotBeNull(); + result.Posts.Count.ShouldBe(2); + result.Posts[0].Post.Id.ShouldBe("post1"); + result.Posts[1].Post.Id.ShouldBe("post2"); + } + + [Fact] + public async Task Transform_SinglePost_ReturnsNewInSubredditWithOnePost() + { + // Arrange + var rawNewInSubreddit = new RawNewInSubreddit + { + Data = new RawListingData + { + Children = + [ + new RawChild + { + Data = new RawPostData + { + Id = "single_post", + Title = "Single Test Post", + Author = "single_author", + SelfText = "This is a single post", + Url = "https://reddit.com/r/test/single_post" + } + } + ] + } + }; + + var redditPost = new RedditPost + { + Post = new RedditPostContent + { + Id = "single_post", + Title = "Single Test Post", + Author = "single_author", + Content = "This is a single post", + Score = 42, + Subreddit = "test", + CreatedUtc = new DateTime(2025, 1, 1, 14, 0, 0, DateTimeKind.Utc), + ImageUrl = "https://example.com/image.jpg" + }, + Comments = + [ + new RedditComment + { + Id = "comment1", + Author = "commenter", + Content = "Great post!", + Score = 5, + CreatedUtc = new DateTime(2025, 1, 1, 14, 30, 0, DateTimeKind.Utc), + Replies = [] + } + ] + }; + + _redditPostClient.GetPost("single_post").Returns(Task.FromResult(redditPost)); + + // Act + var result = await _transformer.Transform(rawNewInSubreddit); + + // Assert + result.ShouldNotBeNull(); + result.Posts.Count.ShouldBe(1); + + var post = result.Posts[0]; + post.Post.Id.ShouldBe("single_post"); + post.Post.Title.ShouldBe("Single Test Post"); + post.Post.Author.ShouldBe("single_author"); + post.Post.Content.ShouldBe("This is a single post"); + post.Post.Score.ShouldBe(42); + post.Post.Subreddit.ShouldBe("test"); + post.Post.ImageUrl.ShouldBe("https://example.com/image.jpg"); + post.Comments.Count.ShouldBe(1); + post.Comments[0].Content.ShouldBe("Great post!"); + } + + [Fact] + public async Task Transform_NullRawNewInSubreddit_ThrowsArgumentNullException() + { + // Act & Assert + var exception = await Should.ThrowAsync(() => _transformer.Transform(null!)); + exception.ParamName.ShouldBe("rawNewInSubreddit"); + } + + [Fact] + public async Task Transform_ConcurrentPostFetching_CallsClientConcurrently() + { + // Arrange + var rawNewInSubreddit = new RawNewInSubreddit + { + Data = new RawListingData + { + Children = + [ + new RawChild { Data = new RawPostData { Id = "post1" } }, + new RawChild { Data = new RawPostData { Id = "post2" } }, + new RawChild { Data = new RawPostData { Id = "post3" } } + ] + } + }; + + var tcs1 = new TaskCompletionSource(); + var tcs2 = new TaskCompletionSource(); + var tcs3 = new TaskCompletionSource(); + + _redditPostClient.GetPost("post1").Returns(tcs1.Task); + _redditPostClient.GetPost("post2").Returns(tcs2.Task); + _redditPostClient.GetPost("post3").Returns(tcs3.Task); + + // Act + var transformTask = _transformer.Transform(rawNewInSubreddit); + + // Complete the tasks + tcs1.SetResult(new RedditPost { Post = new RedditPostContent { Id = "post1" }, Comments = [] }); + tcs2.SetResult(new RedditPost { Post = new RedditPostContent { Id = "post2" }, Comments = [] }); + tcs3.SetResult(new RedditPost { Post = new RedditPostContent { Id = "post3" }, Comments = [] }); + + var result = await transformTask; + + // Assert + result.Posts.Count.ShouldBe(3); + result.Posts.Select(p => p.Post.Id).ShouldBe(["post1", "post2", "post3"]); + } +} \ No newline at end of file diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/Client/RawRedditPostTransformerTests.cs b/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/Client/RawRedditPostTransformerTests.cs new file mode 100644 index 0000000..41f8371 --- /dev/null +++ b/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/Client/RawRedditPostTransformerTests.cs @@ -0,0 +1,435 @@ +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client.Raw; +using Shouldly; + +namespace Elzik.Breef.Infrastructure.Tests.Unit.ContentExtractors.Reddit.Client; + +public class RawRedditPostTransformerTests +{ + private readonly RawRedditPostTransformer _transformer = new(); + + [Fact] + public void Transform_ValidRedditPost_ReturnsExpectedStructure() + { + // Arrange + var redditPost = new RawRedditPost + { + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = + [ + new RawRedditChild + { + Kind = "t3", + Data = new RawRedditCommentData + { + Id = "test123", + Title = "Test Post Title", + Author = "testuser", + Subreddit = "testsubreddit", + Score = 100, + SelfText = "This is test content", + CreatedUtc = new DateTime(2025, 1, 1, 12, 0, 0, DateTimeKind.Utc) + } + } + ] + } + }, + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = + [ + new RawRedditChild + { + Kind = "t1", + Data = new RawRedditCommentData + { + Id = "comment123", + Author = "commenter", + Body = "This is a comment", + Score = 50, + CreatedUtc = new DateTime(2025, 1, 1, 12, 30, 0, DateTimeKind.Utc), + Replies = new RawRedditListing + { + Data = new RawRedditListingData + { + Children = + [ + new RawRedditChild + { + Kind = "t1", + Data = new RawRedditCommentData + { + Id = "reply123", + Author = "replier", + Body = "This is a reply", + Score = 25, + CreatedUtc = new DateTime(2025, 1, 1, 13, 0, 0, DateTimeKind.Utc), + Replies = new RawRedditListing + { + Data = new RawRedditListingData + { + Children = [] + } + } + } + } + ] + } + } + } + } + ] + } + } + }; + + // Act + var result = _transformer.Transform(redditPost); + + // Assert + result.ShouldNotBeNull(); + + result.Post.Id.ShouldBe("test123"); + result.Post.Title.ShouldBe("Test Post Title"); + result.Post.Author.ShouldBe("testuser"); + result.Post.Subreddit.ShouldBe("testsubreddit"); + result.Post.Score.ShouldBe(100); + result.Post.Content.ShouldBe("This is test content"); + result.Post.CreatedUtc.ShouldBe(new DateTime(2025, 1, 1, 12, 0, 0, DateTimeKind.Utc)); + + result.Post.ImageUrl.ShouldBeNull(); + + result.Comments.Count.ShouldBe(1); + var comment = result.Comments[0]; + comment.Id.ShouldBe("comment123"); + comment.Author.ShouldBe("commenter"); + comment.Content.ShouldBe("This is a comment"); + comment.Score.ShouldBe(50); + comment.CreatedUtc.ShouldBe(new DateTime(2025, 1, 1, 12, 30, 0, DateTimeKind.Utc)); + + comment.Replies.Count.ShouldBe(1); + var reply = comment.Replies[0]; + reply.Id.ShouldBe("reply123"); + reply.Author.ShouldBe("replier"); + reply.Content.ShouldBe("This is a reply"); + reply.Score.ShouldBe(25); + reply.CreatedUtc.ShouldBe(new DateTime(2025, 1, 1, 13, 0, 0, DateTimeKind.Utc)); + reply.Replies.Count.ShouldBe(0); + } + + [Fact] + public void Transform_PostWithDirectImageUrl_ExtractsImageCorrectly() + { + // Arrange + var redditPost = new RawRedditPost + { + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = + [ + new RawRedditChild + { + Kind = "t3", + Data = new RawRedditCommentData + { + Id = "test123", + Title = "Image Post", + Author = "testuser", + Url = "https://i.redd.it/example.jpg", + CreatedUtc = DateTime.UtcNow + } + } + ] + } + }, + new RawRedditListing { Data = new RawRedditListingData { Children = [] } } + }; + + // Act + var result = _transformer.Transform(redditPost); + + // Assert + result.Post.ImageUrl.ShouldBe("https://i.redd.it/example.jpg"); + } + + [Fact] + public void Transform_PostWithPreviewImage_ExtractsImageCorrectly() + { + // Arrange + var redditPost = new RawRedditPost + { + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = + [ + new RawRedditChild + { + Kind = "t3", + Data = new RawRedditCommentData + { + Id = "test123", + Title = "Preview Image Post", + Author = "testuser", + Preview = new RawRedditPreview + { + Enabled = true, + Images = + [ + new RawRedditPreviewImage + { + Source = new RawRedditImageSource + { + Url = "https://preview.redd.it/example.jpg", + Width = 800, + Height = 600 + } + } + ] + }, + CreatedUtc = DateTime.UtcNow + } + } + ] + } + }, + new RawRedditListing { Data = new RawRedditListingData { Children = [] } } + }; + + // Act + var result = _transformer.Transform(redditPost); + + // Assert + result.Post.ImageUrl.ShouldBe("https://preview.redd.it/example.jpg"); + } + + [Fact] + public void Transform_PostWithGallery_ExtractsFirstImageCorrectly() + { + // Arrange + var redditPost = new RawRedditPost + { + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = + [ + new RawRedditChild + { + Kind = "t3", + Data = new RawRedditCommentData + { + Id = "test123", + Title = "Gallery Post", + Author = "testuser", + IsGallery = true, + GalleryData = new RawRedditGalleryData + { + Items = + [ + new RawRedditGalleryItem { MediaId = "img1" }, + new RawRedditGalleryItem { MediaId = "img2" } + ] + }, + MediaMetadata = new Dictionary + { + ["img1"] = new RawRedditMediaMetadata + { + Status = "valid", + Source = new RawRedditImageSource + { + Url = "https://i.redd.it/gallery1.jpg", + Width = 1000, + Height = 800 + } + }, + ["img2"] = new RawRedditMediaMetadata + { + Status = "valid", + Source = new RawRedditImageSource + { + Url = "https://i.redd.it/gallery2.jpg", + Width = 800, + Height = 600 + } + } + }, + CreatedUtc = DateTime.UtcNow + } + } + ] + } + }, + new RawRedditListing { Data = new RawRedditListingData { Children = [] } } + }; + + // Act + var result = _transformer.Transform(redditPost); + + // Assert + result.Post.ImageUrl.ShouldBe("https://i.redd.it/gallery1.jpg"); + } + + [Fact] + public void Transform_PostWithThumbnailOnly_ExtractsThumbnailCorrectly() + { + // Arrange + var redditPost = new RawRedditPost + { + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = + [ + new RawRedditChild + { + Kind = "t3", + Data = new RawRedditCommentData + { + Id = "test123", + Title = "Thumbnail Post", + Author = "testuser", + Thumbnail = "https://b.thumbs.redditmedia.com/thumb.jpg", + CreatedUtc = DateTime.UtcNow + } + } + ] + } + }, + new RawRedditListing { Data = new RawRedditListingData { Children = [] } } + }; + + // Act + var result = _transformer.Transform(redditPost); + + // Assert + result.Post.ImageUrl.ShouldBe("https://b.thumbs.redditmedia.com/thumb.jpg"); + } + + [Fact] + public void Transform_PostWithMultipleImageSources_PrioritizesCorrectly() + { + // Arrange + var redditPost = new RawRedditPost + { + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = + [ + new RawRedditChild + { + Kind = "t3", + Data = new RawRedditCommentData + { + Id = "test123", + Title = "Multi-source Image Post", + Author = "testuser", + Url = "https://i.redd.it/direct.jpg", + Thumbnail = "https://b.thumbs.redditmedia.com/thumb.jpg", + Preview = new RawRedditPreview + { + Enabled = true, + Images = + [ + new RawRedditPreviewImage + { + Source = new RawRedditImageSource + { + Url = "https://preview.redd.it/preview.jpg", + Width = 800, + Height = 600 + } + } + ] + }, + CreatedUtc = DateTime.UtcNow + } + } + ] + } + }, + new RawRedditListing { Data = new RawRedditListingData { Children = [] } } + }; + + // Act + var result = _transformer.Transform(redditPost); + + // Assert + result.Post.ImageUrl.ShouldBe("https://preview.redd.it/preview.jpg"); + } + + [Fact] + public void Transform_PostWithInvalidThumbnails_IgnoresInvalidThumbnails() + { + // Arrange + var redditPost = new RawRedditPost + { + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = + [ + new RawRedditChild + { + Kind = "t3", + Data = new RawRedditCommentData + { + Id = "test123", + Title = "Invalid Thumbnail Post", + Author = "testuser", + Thumbnail = "self", + CreatedUtc = DateTime.UtcNow + } + } + ] + } + }, + new RawRedditListing { Data = new RawRedditListingData { Children = [] } } + }; + + // Act + var result = _transformer.Transform(redditPost); + + // Assert + result.Post.ImageUrl.ShouldBeNull(); + } + + [Fact] + public void Transform_EmptyRedditPost_ThrowsArgumentException() + { + // Arrange + var redditPost = new RawRedditPost(); + + // Act & Assert + Should.Throw(() => _transformer.Transform(redditPost)) + .Message.ShouldContain("Reddit post must have at least 2 listings"); + } + + [Fact] + public void Transform_NullRawRedditPost_ThrowsArgumentNullException() + { + // Act & Assert + Should.Throw(() => _transformer.Transform(null!)) + .ParamName.ShouldBe("rawRedditPost"); + } +} \ No newline at end of file diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/Client/RedditDateTimeConverterTests.cs b/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/Client/RedditDateTimeConverterTests.cs new file mode 100644 index 0000000..ed1928c --- /dev/null +++ b/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/Client/RedditDateTimeConverterTests.cs @@ -0,0 +1,141 @@ +using System.Text.Json; +using System.Text.Json.Serialization; +using System.Globalization; +using Shouldly; +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client.Raw; + +namespace Elzik.Breef.Infrastructure.Tests.Unit.ContentExtractors.Reddit.Client; + +public class RedditDateTimeConverterTests +{ + private readonly JsonSerializerOptions _options; + + public RedditDateTimeConverterTests() + { + _options = new JsonSerializerOptions + { + Converters = { new RedditDateTimeConverter() } + }; + } + + [Theory] + [InlineData(1747678685, "2025-05-19T18:18:05Z")] + [InlineData(1747678685.0, "2025-05-19T18:18:05Z")] + public void Read_ValidUnixTimestamp_ReturnsExpectedDateTime(object timestamp, string expectedUtc) + { + // Arrange + var wrappedJson = JsonSerializer + .Serialize(new { created_utc = timestamp }); + + // Act + var result = JsonSerializer.Deserialize(wrappedJson, _options); + + // Assert + result.ShouldNotBeNull(); + result!.Date.ShouldBe(DateTime + .Parse(expectedUtc, CultureInfo.InvariantCulture, DateTimeStyles.AdjustToUniversal)); + } + + [Fact] + public void Read_InvalidToken_ThrowsJsonException() + { + // Arrange + var json = "{\"created_utc\": \"not_a_number\"}"; + + // Act & Assert + Should.Throw(() => + JsonSerializer.Deserialize(json, _options)); + } + + [Fact] + public void Write_UtcDateTime_WritesCorrectUnixTimestamp() + { + // Arrange + var testDate = new TestDate + { + Date = new DateTime(2025, 5, 19, 18, 18, 5, DateTimeKind.Utc) + }; + + // Act + var json = JsonSerializer.Serialize(testDate, _options); + + // Assert + json.ShouldContain("\"created_utc\":1747678685"); + } + + [Fact] + public void Write_LocalDateTime_ConvertsToUtcAndWritesCorrectUnixTimestamp() + { + // Arrange + var localTime = new DateTime(2025, 5, 19, 18, 18, 5, DateTimeKind.Local); + var expectedUtcTime = localTime.ToUniversalTime(); + var expectedUnixSeconds = new DateTimeOffset(expectedUtcTime).ToUnixTimeSeconds(); + + var testDate = new TestDate { Date = localTime }; + + // Act + var json = JsonSerializer.Serialize(testDate, _options); + + // Assert + json.ShouldContain($"\"created_utc\":{expectedUnixSeconds}"); + } + + [Fact] + public void Write_UnspecifiedDateTime_TreatsAsUtcAndWritesCorrectUnixTimestamp() + { + // Arrange + var unspecifiedTime = new DateTime(2025, 5, 19, 18, 18, 5, DateTimeKind.Unspecified); + // When DateTimeKind.Unspecified, it's treated as UTC directly (SpecifyKind to UTC) + var utcTime = DateTime.SpecifyKind(unspecifiedTime, DateTimeKind.Utc); + var expectedUnixSeconds = new DateTimeOffset(utcTime).ToUnixTimeSeconds(); + + var testDate = new TestDate { Date = unspecifiedTime }; + + // Act + var json = JsonSerializer.Serialize(testDate, _options); + + // Assert + json.ShouldContain($"\"created_utc\":{expectedUnixSeconds}"); + } + + [Theory] + [InlineData(DateTimeKind.Utc)] + [InlineData(DateTimeKind.Local)] + [InlineData(DateTimeKind.Unspecified)] + public void Write_AllDateTimeKinds_ProducesValidUnixTimestamp(DateTimeKind kind) + { + // Arrange + var baseTime = new DateTime(2025, 5, 19, 18, 18, 5, DateTimeKind.Unspecified); + var dateTime = kind switch + { + DateTimeKind.Utc => DateTime.SpecifyKind(baseTime, DateTimeKind.Utc), + DateTimeKind.Local => DateTime.SpecifyKind(baseTime, DateTimeKind.Local), + DateTimeKind.Unspecified => DateTime.SpecifyKind(baseTime, DateTimeKind.Unspecified), + _ => baseTime + }; + + var testDate = new TestDate { Date = dateTime }; + + // Act + var json = JsonSerializer.Serialize(testDate, _options); + + // Assert + json.ShouldNotBeNull(); + json.ShouldContain("\"created_utc\":"); + + // Extract the timestamp and verify it's a valid number + var startIndex = json.IndexOf("\"created_utc\":") + "\"created_utc\":".Length; + var endIndex = json.IndexOf('}', startIndex); + var timestampStr = json[startIndex..endIndex]; + + long.TryParse(timestampStr, out var timestamp).ShouldBeTrue(); + timestamp.ShouldBeGreaterThan(0); + } + + private class TestDate + { + [JsonPropertyName("created_utc")] + [JsonConverter(typeof(RedditDateTimeConverter))] + public DateTime Date { get; set; } + } +} \ No newline at end of file diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/Client/RedditPostClientTests.cs b/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/Client/RedditPostClientTests.cs new file mode 100644 index 0000000..35a8988 --- /dev/null +++ b/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/Client/RedditPostClientTests.cs @@ -0,0 +1,789 @@ +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client; +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client.Raw; +using NSubstitute; +using Shouldly; +using System.Text.Json; + +namespace Elzik.Breef.Infrastructure.Tests.Unit.ContentExtractors.Reddit.Client; + +public class RedditPostClientTests +{ + private readonly IRawRedditPostClient _mockRawClient; + private readonly IRawRedditPostTransformer _mockTransformer; + private readonly RedditPostClient _client; + + public RedditPostClientTests() + { + _mockRawClient = Substitute.For(); + _mockTransformer = Substitute.For(); + _client = new RedditPostClient(_mockRawClient, _mockTransformer); + } + + [Fact] + public async Task GetPost_ValidRedditPost_ReturnsTransformedPost() + { + // Arrange + var postId = "1kqiwzc"; + var rawRedditPost = CreateValidRawRedditPost(); + var expectedResult = CreateExpectedTransformedResult(); + + _mockRawClient.GetPost(postId).Returns(Task.FromResult(rawRedditPost)); + _mockTransformer.Transform(rawRedditPost).Returns(expectedResult); + + // Act + var result = await _client.GetPost(postId); + + // Assert + result.ShouldNotBeNull(); + + result.Post.ShouldNotBeNull(); + result.Post.Id.ShouldBe("test123"); + result.Post.Title.ShouldBe("Test Post Title"); + result.Post.Author.ShouldBe("testuser"); + result.Post.Subreddit.ShouldBe("testsubreddit"); + result.Post.Score.ShouldBe(100); + result.Post.Content.ShouldBe("This is test content"); + result.Post.CreatedUtc.ShouldBe(new DateTime(2025, 1, 1, 12, 0, 0, DateTimeKind.Utc)); + + + result.Comments.ShouldNotBeNull(); + result.Comments.Count.ShouldBe(1); + + var comment = result.Comments[0]; + comment.Id.ShouldBe("comment123"); + comment.Author.ShouldBe("commenter"); + comment.Content.ShouldBe("This is a comment"); + comment.Score.ShouldBe(50); + comment.CreatedUtc.ShouldBe(new DateTime(2025, 1, 1, 12, 30, 0, DateTimeKind.Utc)); + + + comment.Replies.Count.ShouldBe(1); + var reply = comment.Replies[0]; + reply.Id.ShouldBe("reply123"); + reply.Author.ShouldBe("replier"); + reply.Content.ShouldBe("This is a reply"); + reply.Score.ShouldBe(25); + reply.Replies.Count.ShouldBe(0); + + + _ = _mockRawClient.Received(1).GetPost(postId); + _ = _mockTransformer.Received(1).Transform(rawRedditPost); + } + + [Fact] + public async Task GetPost_PostWithEmptyStringReplies_HandlesGracefully() + { + // Arrange + var postId = "test456"; + var rawRedditPost = CreateRawRedditPostWithEmptyStringReplies(); + var expectedResult = CreateExpectedResultWithEmptyReplies(); + + _mockRawClient.GetPost(postId).Returns(Task.FromResult(rawRedditPost)); + _mockTransformer.Transform(rawRedditPost).Returns(expectedResult); + + // Act + var result = await _client.GetPost(postId); + + // Assert + result.Comments.Count.ShouldBe(1); + result.Comments[0].Replies.Count.ShouldBe(0, "empty string replies should result in empty list"); + + _ = _mockRawClient.Received(1).GetPost(postId); + _ = _mockTransformer.Received(1).Transform(rawRedditPost); + } + + [Fact] + public async Task GetPost_PostWithNullReplies_HandlesGracefully() + { + // Arrange + var postId = "test789"; + var rawRedditPost = CreateRawRedditPostWithNullReplies(); + var expectedResult = CreateExpectedResultWithEmptyReplies(); + + _mockRawClient.GetPost(postId).Returns(Task.FromResult(rawRedditPost)); + _mockTransformer.Transform(rawRedditPost).Returns(expectedResult); + + // Act + var result = await _client.GetPost(postId); + + // Assert + result.Comments.Count.ShouldBe(1); + result.Comments[0].Replies.Count.ShouldBe(0, "null replies should result in empty list"); + + _ = _mockRawClient.Received(1).GetPost(postId); + _ = _mockTransformer.Received(1).Transform(rawRedditPost); + } + + [Fact] + public async Task GetPost_PostWithJsonElementReplies_HandlesGracefully() + { + // Arrange + var postId = "testjson"; + var rawRedditPost = CreateRawRedditPostWithJsonElementReplies(); + var expectedResult = CreateExpectedResultWithEmptyReplies(); + + _mockRawClient.GetPost(postId).Returns(Task.FromResult(rawRedditPost)); + _mockTransformer.Transform(rawRedditPost).Returns(expectedResult); + + // Act + var result = await _client.GetPost(postId); + + // Assert + result.Comments.Count.ShouldBe(1); + result.Comments[0].Replies.Count.ShouldBe(0, "JsonElement empty string should result in empty list"); + + _ = _mockRawClient.Received(1).GetPost(postId); + _ = _mockTransformer.Received(1).Transform(rawRedditPost); + } + + [Fact] + public async Task GetPost_PostWithMixedCommentTypes_OnlyProcessesComments() + { + // Arrange + var postId = "testmixed"; + var rawRedditPost = CreateRawRedditPostWithMixedCommentTypes(); + var expectedResult = CreateExpectedResultWithSingleComment(); + + _mockRawClient.GetPost(postId).Returns(Task.FromResult(rawRedditPost)); + _mockTransformer.Transform(rawRedditPost).Returns(expectedResult); + + // Act + var result = await _client.GetPost(postId); + + // Assert + result.Comments.Count.ShouldBe(1, "only t1 (comment) types should be processed"); + result.Comments[0].Id.ShouldBe("comment123"); + result.Comments[0].Author.ShouldBe("commenter"); + + _ = _mockRawClient.Received(1).GetPost(postId); + _ = _mockTransformer.Received(1).Transform(rawRedditPost); + } + + [Fact] + public async Task GetPost_PostWithNullFields_HandlesNullsGracefully() + { + // Arrange + var postId = "testnulls"; + var rawRedditPost = CreateRawRedditPostWithNullFields(); + var expectedResult = CreateExpectedResultWithNullFields(); + + _mockRawClient.GetPost(postId).Returns(Task.FromResult(rawRedditPost)); + _mockTransformer.Transform(rawRedditPost).Returns(expectedResult); + + // Act + var result = await _client.GetPost(postId); + + // Assert + result.Post.Id.ShouldBe(string.Empty, "null ID should become empty string"); + result.Post.Title.ShouldBe("Test Post Title"); + result.Post.Author.ShouldBe(string.Empty, "null Author should become empty string"); + result.Post.Content.ShouldBe(string.Empty, "null Content should become empty string"); + + result.Comments.Count.ShouldBe(1); + result.Comments[0].Id.ShouldBe(string.Empty, "null comment ID should become empty string"); + result.Comments[0].Author.ShouldBe(string.Empty, "null comment Author should become empty string"); + result.Comments[0].Content.ShouldBe(string.Empty, "null comment Content should become empty string"); + + _ = _mockRawClient.Received(1).GetPost(postId); + _ = _mockTransformer.Received(1).Transform(rawRedditPost); + } + + [Fact] + public async Task GetPost_PostWithoutTitle_ThrowsInvalidOperationException() + { + // Arrange + var postId = "notitle"; + var rawRedditPost = CreateRawRedditPostWithoutTitle(); + + _mockRawClient.GetPost(postId).Returns(rawRedditPost); + _mockTransformer.Transform(rawRedditPost).Returns(_ => throw new InvalidOperationException("Reddit post must have a title")); + + // Act & Assert + await Should.ThrowAsync(() => _client.GetPost(postId)); + + _ = _mockRawClient.Received(1).GetPost(postId); + _ = _mockTransformer.Received(1).Transform(rawRedditPost); + } + + [Fact] + public async Task GetPost_EmptyRawPost_ThrowsArgumentException() + { + // Arrange + var postId = "empty"; + var emptyRawPost = new RawRedditPost(); + var emptyRawPostParamName = nameof(emptyRawPost); + + _mockRawClient.GetPost(postId).Returns(emptyRawPost); + _mockTransformer.Transform(emptyRawPost).Returns(_ => + throw new ArgumentException("Reddit post must have at least 2 listings (post and comments)", emptyRawPostParamName)); + + // Act & Assert + await Should.ThrowAsync(() => _client.GetPost(postId)); + + _ = _mockRawClient.Received(1).GetPost(postId); + _ = _mockTransformer.Received(1).Transform(emptyRawPost); + } + + [Fact] + public async Task GetPost_PostWithNoChildren_ThrowsArgumentException() + { + // Arrange + var postId = "nochildren"; + var rawRedditPost = CreateRawRedditPostWithNoChildren(); + var rawRedditPostParamName = nameof(rawRedditPost); + + _mockRawClient.GetPost(postId).Returns(Task.FromResult(rawRedditPost)); + _mockTransformer.Transform(rawRedditPost).Returns(_ => + throw new ArgumentException("Post listing must contain at least one child", rawRedditPostParamName)); + + // Act & Assert + await Should.ThrowAsync(() => _client.GetPost(postId)); + + _ = _mockRawClient.Received(1).GetPost(postId); + _ = _mockTransformer.Received(1).Transform(rawRedditPost); + } + + #region Test Data Factory Methods + + private static RedditPost CreateExpectedTransformedResult() + { + return new RedditPost + { + Post = new RedditPostContent + { + Id = "test123", + Title = "Test Post Title", + Author = "testuser", + Subreddit = "testsubreddit", + Score = 100, + Content = "This is test content", + CreatedUtc = new DateTime(2025, 1, 1, 12, 0, 0, DateTimeKind.Utc) + }, + Comments = + [ + new() { + Id = "comment123", + Author = "commenter", + Content = "This is a comment", + Score = 50, + CreatedUtc = new DateTime(2025, 1, 1, 12, 30, 0, DateTimeKind.Utc), + Replies = + [ + new() { + Id = "reply123", + Author = "replier", + Content = "This is a reply", + Score = 25, + CreatedUtc = new DateTime(2025, 1, 1, 13, 0, 0, DateTimeKind.Utc), + Replies = [] + } + ] + } + ] + }; + } + + private static RedditPost CreateExpectedResultWithEmptyReplies() + { + return new RedditPost + { + Post = new RedditPostContent + { + Id = "test456", + Title = "Test Post Title", + Author = "testuser", + Subreddit = "testsubreddit", + Score = 0, + Content = string.Empty, + CreatedUtc = new DateTime(2025, 1, 1, 12, 0, 0, DateTimeKind.Utc) + }, + Comments = + [ + new() { + Id = "comment456", + Author = "commenter", + Content = "This is a comment", + Score = 0, + CreatedUtc = new DateTime(2025, 1, 1, 12, 30, 0, DateTimeKind.Utc), + Replies = [] + } + ] + }; + } + + private static RedditPost CreateExpectedResultWithSingleComment() + { + return new RedditPost + { + Post = new RedditPostContent + { + Id = "testmixed", + Title = "Test Post Title", + Author = "testuser", + Subreddit = "testsubreddit", + Score = 0, + Content = string.Empty, + CreatedUtc = new DateTime(2025, 1, 1, 12, 0, 0, DateTimeKind.Utc) + }, + Comments = + [ + new() { + Id = "comment123", + Author = "commenter", + Content = "This is a comment", + Score = 0, + CreatedUtc = new DateTime(2025, 1, 1, 12, 30, 0, DateTimeKind.Utc), + Replies = [] + } + ] + }; + } + + private static RedditPost CreateExpectedResultWithNullFields() + { + return new RedditPost + { + Post = new RedditPostContent + { + Id = string.Empty, + Title = "Test Post Title", + Author = string.Empty, + Subreddit = string.Empty, + Score = 0, + Content = string.Empty, + CreatedUtc = new DateTime(2025, 1, 1, 12, 0, 0, DateTimeKind.Utc) + }, + Comments = + [ + new() { + Id = string.Empty, + Author = string.Empty, + Content = string.Empty, + Score = 0, + CreatedUtc = new DateTime(2025, 1, 1, 12, 30, 0, DateTimeKind.Utc), + Replies = [] + } + ] + }; + } + + private static RawRedditPost CreateValidRawRedditPost() + { + return + [ + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = + [ + new RawRedditChild + { + Kind = "t3", + Data = new RawRedditCommentData + { + Id = "test123", + Title = "Test Post Title", + Author = "testuser", + Subreddit = "testsubreddit", + Score = 100, + SelfText = "This is test content", + CreatedUtc = new DateTime(2025, 1, 1, 12, 0, 0, DateTimeKind.Utc) + } + } + ] + } + }, + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = + [ + new RawRedditChild + { + Kind = "t1", + Data = new RawRedditCommentData + { + Id = "comment123", + Author = "commenter", + Body = "This is a comment", + Score = 50, + CreatedUtc = new DateTime(2025, 1, 1, 12, 30, 0, DateTimeKind.Utc), + Replies = new RawRedditListing + { + Data = new RawRedditListingData + { + Children = + [ + new RawRedditChild + { + Kind = "t1", + Data = new RawRedditCommentData + { + Id = "reply123", + Author = "replier", + Body = "This is a reply", + Score = 25, + CreatedUtc = new DateTime(2025, 1, 1, 13, 0, 0, DateTimeKind.Utc), + Replies = new RawRedditListing + { + Data = new RawRedditListingData + { + Children = [] + } + } + } + } + ] + } + } + } + } + ] + } + } + ]; + } + + private static RawRedditPost CreateRawRedditPostWithEmptyStringReplies() + { + return + [ + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = + [ + new RawRedditChild + { + Kind = "t3", + Data = new RawRedditCommentData + { + Id = "test456", + Title = "Test Post Title", + Author = "testuser", + CreatedUtc = new DateTime(2025, 1, 1, 12, 0, 0, DateTimeKind.Utc) + } + } + ] + } + }, + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = + [ + new RawRedditChild + { + Kind = "t1", + Data = new RawRedditCommentData + { + Id = "comment456", + Author = "commenter", + Body = "This is a comment", + CreatedUtc = new DateTime(2025, 1, 1, 12, 30, 0, DateTimeKind.Utc), + Replies = "" // Empty string - Reddit API quirk + } + } + ] + } + } + ]; + } + + private static RawRedditPost CreateRawRedditPostWithNullReplies() + { + return + [ + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = + [ + new RawRedditChild + { + Kind = "t3", + Data = new RawRedditCommentData + { + Id = "test789", + Title = "Test Post Title", + Author = "testuser", + CreatedUtc = new DateTime(2025, 1, 1, 12, 0, 0, DateTimeKind.Utc) + } + } + ] + } + }, + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = + [ + new RawRedditChild + { + Kind = "t1", + Data = new RawRedditCommentData + { + Id = "comment789", + Author = "commenter", + Body = "This is a comment", + CreatedUtc = new DateTime(2025, 1, 1, 12, 30, 0, DateTimeKind.Utc), + Replies = null // Null replies + } + } + ] + } + } + ]; + } + + private static RawRedditPost CreateRawRedditPostWithJsonElementReplies() + { + var emptyStringJson = JsonSerializer.SerializeToElement(""); + + return + [ + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = + [ + new RawRedditChild + { + Kind = "t3", + Data = new RawRedditCommentData + { + Id = "testjson", + Title = "Test Post Title", + Author = "testuser", + CreatedUtc = new DateTime(2025, 1, 1, 12, 0, 0, DateTimeKind.Utc) + } + } + ] + } + }, + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = + [ + new RawRedditChild + { + Kind = "t1", + Data = new RawRedditCommentData + { + Id = "commentjson", + Author = "commenter", + Body = "This is a comment", + CreatedUtc = new DateTime(2025, 1, 1, 12, 30, 0, DateTimeKind.Utc), + Replies = emptyStringJson // JsonElement with empty string + } + } + ] + } + } + ]; + } + + private static RawRedditPost CreateRawRedditPostWithMixedCommentTypes() + { + return + [ + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = + [ + new RawRedditChild + { + Kind = "t3", + Data = new RawRedditCommentData + { + Id = "testmixed", + Title = "Test Post Title", + Author = "testuser", + CreatedUtc = new DateTime(2025, 1, 1, 12, 0, 0, DateTimeKind.Utc) + } + } + ] + } + }, + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = + [ + new RawRedditChild + { + Kind = "t1", // Comment - should be processed + Data = new RawRedditCommentData + { + Id = "comment123", + Author = "commenter", + Body = "This is a comment", + CreatedUtc = new DateTime(2025, 1, 1, 12, 30, 0, DateTimeKind.Utc), + Replies = null + } + }, + new RawRedditChild + { + Kind = "t3", // Post - should be ignored + Data = new RawRedditCommentData + { + Id = "post456", + Author = "poster", + Body = "This should be ignored", + CreatedUtc = new DateTime(2025, 1, 1, 12, 35, 0, DateTimeKind.Utc), + Replies = null + } + }, + new RawRedditChild + { + Kind = "more", // More comments - should be ignored + Data = new RawRedditCommentData + { + Id = "more789", + Author = "system", + Body = "Load more comments", + CreatedUtc = new DateTime(2025, 1, 1, 12, 40, 0, DateTimeKind.Utc), + Replies = null + } + } + ] + } + } + ]; + } + + private static RawRedditPost CreateRawRedditPostWithNullFields() + { + return + [ + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = + [ + new RawRedditChild + { + Kind = "t3", + Data = new RawRedditCommentData + { + Id = null, // Null ID + Title = "Test Post Title", + Author = null, // Null Author + Subreddit = null, // Null Subreddit + SelfText = null, // Null Content + CreatedUtc = new DateTime(2025, 1, 1, 12, 0, 0, DateTimeKind.Utc) + } + } + ] + } + }, + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = + [ + new RawRedditChild + { + Kind = "t1", + Data = new RawRedditCommentData + { + Id = null, // Null ID + Author = null, // Null Author + Body = null, // Null Body + CreatedUtc = new DateTime(2025, 1, 1, 12, 30, 0, DateTimeKind.Utc), + Replies = null + } + } + ] + } + } + ]; + } + + private static RawRedditPost CreateRawRedditPostWithoutTitle() + { + return + [ + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = + [ + new RawRedditChild + { + Kind = "t3", + Data = new RawRedditCommentData + { + Id = "notitle", + Title = null, // No title - should throw + Author = "testuser", + CreatedUtc = new DateTime(2025, 1, 1, 12, 0, 0, DateTimeKind.Utc) + } + } + ] + } + }, + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = [] + } + } + ]; + } + + private static RawRedditPost CreateRawRedditPostWithNoChildren() + { + return + [ + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = [] // No children - should throw + } + }, + new RawRedditListing + { + Kind = "Listing", + Data = new RawRedditListingData + { + Children = [] + } + } + ]; + } + + #endregion +} \ No newline at end of file diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/Client/SubredditClientTests.cs b/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/Client/SubredditClientTests.cs new file mode 100644 index 0000000..f113f68 --- /dev/null +++ b/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/Client/SubredditClientTests.cs @@ -0,0 +1,75 @@ +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client; +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client.Raw; +using NSubstitute; +using Shouldly; + +namespace Elzik.Breef.Infrastructure.Tests.Unit.ContentExtractors.Reddit.Client; + +public class SubredditClientTests +{ + private readonly IRawSubredditClient _mockRawClient; + private readonly IRawNewInSubredditTransformer _mockTransformer; + private readonly SubredditClient _client; + + public SubredditClientTests() + { + _mockRawClient = Substitute.For(); + _mockTransformer = Substitute.For(); + _client = new SubredditClient(_mockRawClient, _mockTransformer); + } + + [Fact] + public async Task GetNewInSubreddit_ValidSubredditName_ReturnsTransformedResult() + { + // Arrange + var subRedditName = "test"; + var rawNewInSubreddit = new RawNewInSubreddit + { + Data = new RawListingData + { + Children = + [ + new RawChild + { + Data = new RawPostData + { + Id = "post1", + Title = "Test Post" + } + } + ] + } + }; + + var expectedResult = new NewInSubreddit + { + Posts = + [ + new RedditPost + { + Post = new RedditPostContent + { + Id = "post1", + Title = "Test Post", + Author = "testuser", + Score = 100 + }, + Comments = [] + } + ] + }; + + _mockRawClient.GetNewInSubreddit(subRedditName).Returns(Task.FromResult(rawNewInSubreddit)); + _mockTransformer.Transform(rawNewInSubreddit).Returns(Task.FromResult(expectedResult)); + + // Act + var result = await _client.GetNewInSubreddit(subRedditName); + + // Assert + result.ShouldNotBeNull(); + result.Posts.ShouldNotBeNull(); + result.Posts.Count.ShouldBe(1); + result.Posts[0].Post.Id.ShouldBe("post1"); + result.Posts[0].Post.Title.ShouldBe("Test Post"); + } +} \ No newline at end of file diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/RedditOptionsTests.cs b/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/RedditOptionsTests.cs new file mode 100644 index 0000000..5214963 --- /dev/null +++ b/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/RedditOptionsTests.cs @@ -0,0 +1,180 @@ +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Options; +using Shouldly; + +namespace Elzik.Breef.Infrastructure.Tests.Unit.ContentExtractors.Reddit; + +public class RedditOptionsTests +{ + [Fact] + public void RedditOptions_DefaultBaseAddress_ShouldBeRedditCom() + { + // Arrange & Act + var options = new RedditOptions(); + + // Assert + options.DefaultBaseAddress.ShouldBe("https://www.reddit.com"); + } + + [Fact] + public void RedditOptions_FallbackImageUrl_ShouldBeRedditLogo() + { + // Arrange & Act + var options = new RedditOptions(); + + // Assert + options.FallbackImageUrl.ShouldBe("https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Lockup_Logo.svg"); + } + + [Fact] + public void RedditOptions_AdditionalBaseAddresses_ShouldBeEmptyByDefault() + { + // Arrange & Act + var options = new RedditOptions(); + + // Assert + options.AdditionalBaseAddresses.ShouldBeEmpty(); + } + + [Fact] + public void RedditOptions_AllBaseAddresses_ShouldIncludeDefaultAndAdditional() + { + // Arrange + var options = new RedditOptions + { + DefaultBaseAddress = "https://www.reddit.com", + AdditionalBaseAddresses = ["https://custom.reddit.com", "https://alt.reddit.instance.com"] + }; + + // Act + var allAddresses = options.AllBaseAddresses.ToList(); + + // Assert + allAddresses.ShouldBe(["https://www.reddit.com", "https://custom.reddit.com", "https://alt.reddit.instance.com"]); + } + + [Fact] + public void RedditOptions_AllDomains_ShouldExtractDomainsFromValidUrls() + { + // Arrange + var options = new RedditOptions + { + DefaultBaseAddress = "https://www.reddit.com", + AdditionalBaseAddresses = ["https://custom.reddit.com", "https://alt.reddit.instance.com"] + }; + + // Act + var allDomains = options.AllDomains.ToList(); + + // Assert + allDomains.ShouldBe(["www.reddit.com", "custom.reddit.com", "alt.reddit.instance.com"]); + } + + [Fact] + public void RedditOptions_DefaultConfiguration_ShouldIncludeBothWwwAndNonWwwReddit() + { + // Arrange & Act + var options = new RedditOptions(); + var allDomains = options.AllDomains.ToList(); + + // Assert + allDomains.ShouldBe(["www.reddit.com", "reddit.com"]); + } + + [Fact] + public void RedditOptions_AllDomains_ShouldSkipInvalidUrls() + { + // Arrange + var options = new RedditOptions + { + DefaultBaseAddress = "https://www.reddit.com", + AdditionalBaseAddresses = ["https://custom.reddit.com", "not-a-valid-url", "https://alt.reddit.instance.com"] + }; + + // Act + var allDomains = options.AllDomains.ToList(); + + // Assert + allDomains.ShouldBe(["www.reddit.com", "custom.reddit.com", "alt.reddit.instance.com"]); + } + + [Fact] + public void RedditOptions_ConfigurationBinding_ShouldOverrideDefault() + { + // Arrange + var configurationData = new Dictionary + { + { "Reddit:DefaultBaseAddress", "https://custom.reddit.com" } + }; + + var configuration = new ConfigurationBuilder() + .AddInMemoryCollection(configurationData) + .Build(); + + var services = new ServiceCollection(); + services.AddOptions() + .Bind(configuration.GetSection("Reddit")); + + var serviceProvider = services.BuildServiceProvider(); + + // Act + var redditOptions = serviceProvider.GetRequiredService>().Value; + + // Assert + redditOptions.DefaultBaseAddress.ShouldBe("https://custom.reddit.com"); + } + + [Fact] + public void RedditOptions_ConfigurationBinding_ShouldBindAdditionalBaseAddresses() + { + // Arrange + var configurationData = new Dictionary + { + { "Reddit:DefaultBaseAddress", "https://www.reddit.com" }, + { "Reddit:AdditionalBaseAddresses:0", "https://custom.reddit.com" }, + { "Reddit:AdditionalBaseAddresses:1", "https://alt.reddit.instance.com" } + }; + + var configuration = new ConfigurationBuilder() + .AddInMemoryCollection(configurationData) + .Build(); + + var services = new ServiceCollection(); + services.AddOptions() + .Bind(configuration.GetSection("Reddit")); + + var serviceProvider = services.BuildServiceProvider(); + + // Act + var redditOptions = serviceProvider.GetRequiredService>().Value; + + // Assert + redditOptions.DefaultBaseAddress.ShouldBe("https://www.reddit.com"); + // Configuration binding replaces the default additional addresses + redditOptions.AdditionalBaseAddresses.ShouldBe(["https://custom.reddit.com", "https://alt.reddit.instance.com"]); + } + + [Fact] + public void RedditOptions_EmptyConfiguration_ShouldUseDefault() + { + // Arrange + var configuration = new ConfigurationBuilder().Build(); + + var services = new ServiceCollection(); + services.AddOptions() + .Bind(configuration.GetSection("Reddit")); + + var serviceProvider = services.BuildServiceProvider(); + + // Act + var redditOptions = serviceProvider.GetRequiredService>().Value; + + // Assert + redditOptions.DefaultBaseAddress.ShouldBe("https://www.reddit.com"); + redditOptions.AdditionalBaseAddresses.ShouldBeEmpty(); + // But AllDomains should still include the default reddit.com + redditOptions.AllDomains.ShouldBe(["www.reddit.com", "reddit.com"]); + } +} \ No newline at end of file diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/RedditPostContentExtractorTests.cs b/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/RedditPostContentExtractorTests.cs new file mode 100644 index 0000000..6d51bad --- /dev/null +++ b/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/RedditPostContentExtractorTests.cs @@ -0,0 +1,378 @@ +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit; +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client; +using Microsoft.Extensions.Options; +using NSubstitute; +using Shouldly; +using System.Text.Json; + +namespace Elzik.Breef.Infrastructure.Tests.Unit.ContentExtractors.Reddit +{ + public class RedditPostContentExtractorTests + { + private readonly IRedditPostClient _mockRedditPostClient; + private readonly ISubredditImageExtractor _mockSubredditImageExtractor; + private readonly IOptions _mockRedditOptions; + private readonly RedditPostContentExtractor _extractor; + + public RedditPostContentExtractorTests() + { + _mockRedditPostClient = Substitute.For(); + _mockSubredditImageExtractor = Substitute.For(); + _mockRedditOptions = Substitute.For>(); + _mockRedditOptions.Value.Returns(new RedditOptions()); + + _extractor = new RedditPostContentExtractor(_mockRedditPostClient, _mockSubredditImageExtractor, _mockRedditOptions); + } + + [Theory] + [InlineData("https://reddit.com/r/programming/comments/abc123/title")] + [InlineData("https://reddit.com/r/programming/comments/abc123/title/")] + [InlineData("https://www.reddit.com/r/programming/comments/abc123/title")] + [InlineData("https://www.reddit.com/r/programming/comments/abc123/title/")] + [InlineData("https://reddit.com/r/programming/comments/abc123")] + [InlineData("https://reddit.com/r/programming/comments/abc123/")] + [InlineData("https://www.reddit.com/r/programming/comments/abc123")] + [InlineData("https://www.reddit.com/r/programming/comments/abc123/")] + [InlineData("hTTpS://rEDDiT.cOm/R/pRoGrAmMiNg/CoMmEnTs/AbC123/TiTlE")] + [InlineData("hTTpS://rEDDiT.cOm/R/pRoGrAmMiNg/CoMmEnTs/AbC123")] + public void CanHandle_ValidRedditPostUrl_ReturnsTrue(string url) + { + // Act + var canHandle = _extractor.CanHandle(url); + + // Assert + canHandle.ShouldBeTrue(); + } + + [Theory] + [InlineData("not-a-url")] + [InlineData("https://reddit.com")] + [InlineData("https://reddit.com/r")] + [InlineData("https://reddit.com/r/programming")] + [InlineData("https://reddit.com/r/programming/comments")] + [InlineData("https://reddit.com/r/programming/comments/abc123/title/extra")] + [InlineData("https://not-reddit.com/r/programming/comments/abc123/title")] + [InlineData("https://www2.reddit.com/r/programming/comments/abc123/title")] + [InlineData("https://reddit.com/r/programming/posts/abc123/title")] + [InlineData("https://reddit.com/user/username/comments/abc123/title")] + [InlineData("https://reddit.com/r/programming/comments/abc123/title/extra/segment")] + public void CanHandle_InvalidRedditPostUrl_ReturnsFalse(string url) + { + // Act + var canHandle = _extractor.CanHandle(url); + + // Assert + canHandle.ShouldBeFalse(); + } + + [Theory] + [InlineData("https://custom.reddit.com/r/programming/comments/abc123/title")] + [InlineData("https://alt.reddit.instance.com/r/programming/comments/abc123/title")] + public void CanHandle_CustomRedditInstance_ReturnsTrue(string url) + { + // Arrange + var customOptions = new RedditOptions + { + DefaultBaseAddress = "https://www.reddit.com", + AdditionalBaseAddresses = ["https://reddit.com", "https://custom.reddit.com", "https://alt.reddit.instance.com"] + }; + _mockRedditOptions.Value.Returns(customOptions); + var extractor = new RedditPostContentExtractor(_mockRedditPostClient, _mockSubredditImageExtractor, _mockRedditOptions); + + // Act + var canHandle = extractor.CanHandle(url); + + // Assert + canHandle.ShouldBeTrue(); + } + + [Theory] + [InlineData("https://unknown.reddit.com/r/programming/comments/abc123/title")] + [InlineData("https://www.unknown.reddit.com/r/programming/comments/abc123/title")] + public void CanHandle_UnknownRedditInstance_ReturnsFalse(string url) + { + // Arrange + var customOptions = new RedditOptions + { + DefaultBaseAddress = "https://www.reddit.com", + AdditionalBaseAddresses = ["https://reddit.com", "https://custom.reddit.com"] + }; + _mockRedditOptions.Value.Returns(customOptions); + var extractor = new RedditPostContentExtractor(_mockRedditPostClient, _mockSubredditImageExtractor, _mockRedditOptions); + + // Act + var canHandle = extractor.CanHandle(url); + + // Assert + canHandle.ShouldBeFalse(); + } + + [Theory] + [InlineData("https://www.reddit.com/r/programming/comments/abc123/title")] + [InlineData("https://www.reddit.com/r/programming/comments/abc123")] + public async Task ExtractAsync_ValidUrl_CallsRedditPostClientWithCorrectPostId(string url) + { + // Arrange + var testPost = CreateTestRedditPost("abc123", "Test Title", "https://example.com/image.jpg"); + _mockRedditPostClient.GetPost("abc123").Returns(testPost); + + // Act + await _extractor.ExtractAsync(url); + + // Assert + await _mockRedditPostClient.Received(1).GetPost("abc123"); + } + + [Fact] + public async Task ExtractAsync_PostWithImage_ReturnsExtractWithPostImage() + { + // Arrange + var url = "https://www.reddit.com/r/programming/comments/abc123/title"; + var postImageUrl = "https://i.redd.it/post-image.jpg"; + var testPost = CreateTestRedditPost("abc123", "Test Title", postImageUrl); + _mockRedditPostClient.GetPost("abc123").Returns(testPost); + + // Act + var result = await _extractor.ExtractAsync(url); + + // Assert + result.PreviewImageUrl.ShouldBe(postImageUrl); + await _mockSubredditImageExtractor.DidNotReceive().GetSubredditImageUrlAsync(Arg.Any()); + } + + [Fact] + public async Task ExtractAsync_PostWithoutImage_UsesSubredditFallbackImage() + { + // Arrange + var url = "https://www.reddit.com/r/programming/comments/abc123/title"; + var subredditImageUrl = "https://styles.redditmedia.com/programming-icon.png"; + var testPost = CreateTestRedditPost("abc123", "Test Title", null); + _mockRedditPostClient.GetPost("abc123").Returns(testPost); + _mockSubredditImageExtractor.GetSubredditImageUrlAsync("programming").Returns(subredditImageUrl); + + // Act + var result = await _extractor.ExtractAsync(url); + + // Assert + result.PreviewImageUrl.ShouldBe(subredditImageUrl); + await _mockSubredditImageExtractor.Received(1).GetSubredditImageUrlAsync("programming"); + } + + [Fact] + public async Task ExtractAsync_PostWithEmptyImage_UsesSubredditFallbackImage() + { + // Arrange + var url = "https://www.reddit.com/r/programming/comments/abc123/title"; + var subredditImageUrl = "https://styles.redditmedia.com/programming-icon.png"; + var testPost = CreateTestRedditPost("abc123", "Test Title", ""); + _mockRedditPostClient.GetPost("abc123").Returns(testPost); + _mockSubredditImageExtractor.GetSubredditImageUrlAsync("programming").Returns(subredditImageUrl); + + // Act + var result = await _extractor.ExtractAsync(url); + + // Assert + result.PreviewImageUrl.ShouldBe(subredditImageUrl); + await _mockSubredditImageExtractor.Received(1).GetSubredditImageUrlAsync("programming"); + } + + [Fact] + public async Task ExtractAsync_PostWithWhitespaceImage_UsesSubredditFallbackImage() + { + // Arrange + var url = "https://www.reddit.com/r/programming/comments/abc123/title"; + var subredditImageUrl = "https://styles.redditmedia.com/programming-icon.png"; + var testPost = CreateTestRedditPost("abc123", "Test Title", " "); + _mockRedditPostClient.GetPost("abc123").Returns(testPost); + _mockSubredditImageExtractor.GetSubredditImageUrlAsync("programming").Returns(subredditImageUrl); + + // Act + var result = await _extractor.ExtractAsync(url); + + // Assert + result.PreviewImageUrl.ShouldBe(subredditImageUrl); + await _mockSubredditImageExtractor.Received(1).GetSubredditImageUrlAsync("programming"); + } + + [Fact] + public async Task ExtractAsync_ValidUrl_ReturnsCorrectTitle() + { + // Arrange + var url = "https://www.reddit.com/r/programming/comments/abc123/title"; + var expectedTitle = "How to write better code"; + var testPost = CreateTestRedditPost("abc123", expectedTitle, "https://example.com/image.jpg"); + _mockRedditPostClient.GetPost("abc123").Returns(testPost); + + // Act + var result = await _extractor.ExtractAsync(url); + + // Assert + result.Title.ShouldBe(expectedTitle); + } + + [Fact] + public async Task ExtractAsync_ValidUrl_ReturnsSerializedPostAsContent() + { + // Arrange + var url = "https://www.reddit.com/r/programming/comments/abc123/title"; + var testPost = CreateTestRedditPost("abc123", "Test Title", "https://example.com/image.jpg"); + _mockRedditPostClient.GetPost("abc123").Returns(testPost); + + // Act + var result = await _extractor.ExtractAsync(url); + + // Assert + var deserializedPost = JsonSerializer.Deserialize(result.Content); + deserializedPost.ShouldNotBeNull(); + deserializedPost.Post.Id.ShouldBe("abc123"); + deserializedPost.Post.Title.ShouldBe("Test Title"); + } + + [Theory] + [InlineData("not-a-url")] + [InlineData("")] + [InlineData(" ")] + public async Task ExtractAsync_InvalidUrl_ThrowsInvalidOperationException(string invalidUrl) + { + // Act & Assert + await Should.ThrowAsync(() => _extractor.ExtractAsync(invalidUrl)); + } + + [Theory] + [InlineData("https://reddit.com")] + [InlineData("https://reddit.com/r/programming")] + [InlineData("https://reddit.com/r/programming/comments")] + [InlineData("https://reddit.com/r/programming/posts/abc123/title")] + [InlineData("https://reddit.com/r/programming/comments/abc123/title/extra")] + public async Task ExtractAsync_UnsupportedUrl_ThrowsInvalidOperationException(string unsupportedUrl) + { + // Act & Assert + await Should.ThrowAsync(() => _extractor.ExtractAsync(unsupportedUrl)); + } + + [Fact] + public async Task ExtractAsync_InvalidUrl_ThrowsWithMeaningfulErrorMessage() + { + // Arrange + var invalidUrl = "not-a-valid-url"; + + // Act & Assert + var exception = await Should.ThrowAsync(() => _extractor.ExtractAsync(invalidUrl)); + exception.Message.ShouldContain("Invalid URL format"); + exception.Message.ShouldContain(invalidUrl); + exception.Message.ShouldContain("valid absolute URI"); + } + + [Fact] + public async Task ExtractAsync_UnsupportedUrl_ThrowsWithMeaningfulErrorMessage() + { + // Arrange + var unsupportedUrl = "https://reddit.com/r/programming"; + + // Act & Assert + var exception = await Should.ThrowAsync(() => _extractor.ExtractAsync(unsupportedUrl)); + exception.Message.ShouldContain("Unsupported Reddit URL format"); + exception.Message.ShouldContain(unsupportedUrl); + exception.Message.ShouldContain("Expected format"); + exception.Message.ShouldContain("reddit-domain"); + } + + [Fact] + public async Task ExtractAsync_UnsupportedHost_ThrowsWithMeaningfulErrorMessage() + { + // Arrange + var unsupportedHostUrl = "https://not-reddit.com/r/programming/comments/abc123/title"; + + // Act & Assert + var exception = await Should.ThrowAsync(() => _extractor.ExtractAsync(unsupportedHostUrl)); + exception.Message.ShouldContain("Unsupported domain"); + exception.Message.ShouldContain("not-reddit.com"); + exception.Message.ShouldContain("Supported domains"); + } + + [Theory] + [InlineData("https://www.reddit.com/r/programming/comments/abc123/title", "programming")] + [InlineData("https://www.reddit.com/r/programming/comments/abc123", "programming")] + [InlineData("https://www.reddit.com/r/funny/comments/def456/joke", "funny")] + [InlineData("https://www.reddit.com/r/funny/comments/def456", "funny")] + [InlineData("https://www.reddit.com/r/todayilearned/comments/ghi789/fact", "todayilearned")] + [InlineData("https://www.reddit.com/r/todayilearned/comments/ghi789", "todayilearned")] + [InlineData("https://www.reddit.com/r/AskReddit/comments/jkl012/question", "AskReddit")] + [InlineData("https://www.reddit.com/r/AskReddit/comments/jkl012", "AskReddit")] + public async Task ExtractAsync_DifferentSubreddits_CallsSubredditImageExtractorWithCorrectName(string url, string expectedSubreddit) + { + // Arrange + var testPost = CreateTestRedditPost("test123", "Test Title", null); + _mockRedditPostClient.GetPost(Arg.Any()).Returns(testPost); + _mockSubredditImageExtractor.GetSubredditImageUrlAsync(expectedSubreddit) + .Returns($"https://styles.redditmedia.com/{expectedSubreddit}-icon.png"); + + // Act + await _extractor.ExtractAsync(url); + + // Assert + await _mockSubredditImageExtractor.Received(1).GetSubredditImageUrlAsync(expectedSubreddit); + } + + [Theory] + [InlineData("https://i.redd.it/gallery-image.jpg")] + [InlineData("https://preview.redd.it/preview-image.png")] + [InlineData("https://external-preview.redd.it/external-image.gif")] + [InlineData("https://imgur.com/direct-link.webp")] + [InlineData("https://reddit.com/thumbnail.bmp")] + public async Task ExtractAsync_PostWithVariousImageUrls_DoesNotUseFallback(string imageUrl) + { + // Arrange + var url = "https://www.reddit.com/r/programming/comments/abc123/title"; + var testPost = CreateTestRedditPost("abc123", "Test Title", imageUrl); + _mockRedditPostClient.GetPost("abc123").Returns(testPost); + + // Act + var result = await _extractor.ExtractAsync(url); + + // Assert + result.PreviewImageUrl.ShouldBe(imageUrl); + await _mockSubredditImageExtractor.DidNotReceive().GetSubredditImageUrlAsync(Arg.Any()); + } + + [Fact] + public async Task ExtractAsync_SubredditImageExtractorThrows_PropagatesException() + { + // Arrange + var url = "https://www.reddit.com/r/programming/comments/abc123/title"; + var testPost = CreateTestRedditPost("abc123", "Test Title", null); + _mockRedditPostClient.GetPost("abc123").Returns(testPost); + _mockSubredditImageExtractor.GetSubredditImageUrlAsync("programming") + .Returns(Task.FromException(new HttpRequestException("Network error"))); + + // Act & Assert + await Should.ThrowAsync(() => _extractor.ExtractAsync(url)); + } + + private static RedditPost CreateTestRedditPost(string id, string title, string? imageUrl) => new() + { + Post = new RedditPostContent + { + Id = id, + Title = title, + Author = "testuser", + Subreddit = "testsubreddit", + Score = 100, + Content = "Test post content", + CreatedUtc = DateTime.UtcNow, + ImageUrl = imageUrl + }, + Comments = + [ + new() { + Id = "comment1", + Author = "commenter1", + Score = 50, + Content = "Test comment", + CreatedUtc = DateTime.UtcNow, + Replies = [] + } + ] + }; + } +} \ No newline at end of file diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/SubRedditExtractorTests.cs b/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/SubRedditExtractorTests.cs new file mode 100644 index 0000000..80c4b2a --- /dev/null +++ b/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentExtractors/Reddit/SubRedditExtractorTests.cs @@ -0,0 +1,615 @@ +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit; +using Elzik.Breef.Infrastructure.ContentExtractors.Reddit.Client; +using Microsoft.Extensions.Options; +using NSubstitute; +using Shouldly; +using System.Text.Json; + +namespace Elzik.Breef.Infrastructure.Tests.Unit.ContentExtractors.Reddit +{ + public class SubredditExtractorTests + { + private const string FallbackImageUrl = "https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Lockup_Logo.svg"; + + private readonly ISubredditClient _mockSubredditClient; + private readonly IHttpClientFactory _mockHttpClientFactory; + private readonly IOptions _mockRedditOptions; + private readonly SubredditContentExtractor _extractor; + + public SubredditExtractorTests() + { + _mockSubredditClient = Substitute.For(); + _mockSubredditClient.GetNewInSubreddit(Arg.Any()) + .Returns(new NewInSubreddit { Posts = new List() }); + + _mockHttpClientFactory = Substitute.For(); + var mockHandler = new MockHttpMessageHandler(JsonSerializer.Serialize(new { data = new { } }), System.Net.HttpStatusCode.OK); + var httpClient = new HttpClient(mockHandler); + _mockHttpClientFactory.CreateClient("BreefDownloader").Returns(httpClient); + + _mockRedditOptions = Substitute.For>(); + _mockRedditOptions.Value.Returns(new RedditOptions + { + DefaultBaseAddress = "https://www.reddit.com", + AdditionalBaseAddresses = ["https://reddit.com"], + FallbackImageUrl = FallbackImageUrl + }); + + _extractor = new SubredditContentExtractor(_mockSubredditClient, _mockHttpClientFactory, _mockRedditOptions); + } + + [Theory] + [InlineData("https://reddit.com/r/testsubreddit/")] + [InlineData("https://reddit.com/r/testsubreddit")] + [InlineData("hTTpS://rEDdiT.cOm/R/tEsTsUbReDdIt/")] + [InlineData("https://www.reddit.com/r/testsubreddit/")] + public void CanHandle_ValidSubredditUrl_ReturnsTrue(string url) + { + // Act + var canHandle = _extractor.CanHandle(url); + + // Assert + canHandle.ShouldBeTrue(); + } + + [Theory] + [InlineData("not-a-url")] + [InlineData("https://reddit.com")] + [InlineData("https://reddit.com/r")] + [InlineData("https://reddit.com/r/testsubreddit/more")] + [InlineData("https://not-reddit.com/r/testsubreddit/")] + [InlineData("https://www2.reddit.com/r/testsubreddit/")] + public void CanHandle_InvalidSubredditUrl_ReturnsFalse(string url) + { + // Act + var canHandle = _extractor.CanHandle(url); + + // Assert + canHandle.ShouldBeFalse(); + } + + [Theory] + [InlineData("https://custom.reddit.com/r/testsubreddit/")] + [InlineData("https://alt.reddit.instance.com/r/testsubreddit/")] + public void CanHandle_CustomRedditInstance_ReturnsTrue(string url) + { + // Arrange + var customOptions = new RedditOptions + { + DefaultBaseAddress = "https://www.reddit.com", + AdditionalBaseAddresses = ["https://reddit.com", "https://custom.reddit.com", "https://alt.reddit.instance.com"], + FallbackImageUrl = FallbackImageUrl + }; + _mockRedditOptions.Value.Returns(customOptions); + var extractor = new SubredditContentExtractor(_mockSubredditClient, _mockHttpClientFactory, _mockRedditOptions); + + // Act + var canHandle = extractor.CanHandle(url); + + // Assert + canHandle.ShouldBeTrue(); + } + + [Theory] + [InlineData("https://unknown.reddit.com/r/testsubreddit/")] + [InlineData("https://www.unknown.reddit.com/r/testsubreddit/")] + public void CanHandle_UnknownRedditInstance_ReturnsFalse(string url) + { + // Arrange + var customOptions = new RedditOptions + { + DefaultBaseAddress = "https://www.reddit.com", + AdditionalBaseAddresses = ["https://reddit.com", "https://custom.reddit.com"], + FallbackImageUrl = FallbackImageUrl + }; + _mockRedditOptions.Value.Returns(customOptions); + var extractor = new SubredditContentExtractor(_mockSubredditClient, _mockHttpClientFactory, _mockRedditOptions); + + // Act + var canHandle = extractor.CanHandle(url); + + // Assert + canHandle.ShouldBeFalse(); + } + + [Theory] + [InlineData("icon_img")] + [InlineData("community_icon")] + [InlineData("banner_background_image")] + [InlineData("banner_img")] + [InlineData("mobile_banner_image")] + public async Task ExtractAsync_ImageKeyExistsAndIsAccessible_ReturnsImageUrl(string imageKey) + { + // Arrange + var url = $"https://www.reddit.com/r/subreddit"; + var imageUrl = $"https://img.reddit.com/{imageKey}.png"; + var json = CreateJsonWithImageKey(imageKey, imageUrl); + + var mockHandler = new MockHttpMessageHandler(json, System.Net.HttpStatusCode.OK); + var httpClient = new HttpClient(mockHandler); + _mockHttpClientFactory.CreateClient("BreefDownloader").Returns(httpClient); + + // Act + var result = await _extractor.ExtractAsync(url); + + // Assert + result.PreviewImageUrl.ShouldBe(imageUrl); + } + + [Theory] + [InlineData("icon_img")] + [InlineData("community_icon")] + [InlineData("banner_background_image")] + [InlineData("banner_img")] + [InlineData("mobile_banner_image")] + public async Task ExtractAsync_TryGetReturnsFalse_UsesFallbackImageUrl(string imageKey) + { + // Arrange + var url = $"https://www.reddit.com/r/subreddit"; + var imageUrl = $"https://img.reddit.com/{imageKey}.png"; + var json = CreateJsonWithImageKey(imageKey, imageUrl); + + var mockHandler = new MockHttpMessageHandler(json, System.Net.HttpStatusCode.OK, imageUrl, System.Net.HttpStatusCode.NotFound); + var httpClient = new HttpClient(mockHandler); + _mockHttpClientFactory.CreateClient("BreefDownloader").Returns(httpClient); + + // Act + var result = await _extractor.ExtractAsync(url); + + // Assert + result.PreviewImageUrl.ShouldBe(FallbackImageUrl); + } + + [Fact] + public async Task ExtractAsync_NoImageKeysExist_UsesFallbackImageUrl() + { + // Arrange + var url = $"https://www.reddit.com/r/subreddit"; + var json = JsonSerializer.Serialize(new { data = new { } }); + + var mockHandler = new MockHttpMessageHandler(json, System.Net.HttpStatusCode.OK); + var httpClient = new HttpClient(mockHandler); + _mockHttpClientFactory.CreateClient("BreefDownloader").Returns(httpClient); + + // Act + var result = await _extractor.ExtractAsync(url); + + // Assert + result.PreviewImageUrl.ShouldBe(FallbackImageUrl); + } + + [Fact] + public async Task ExtractAsync_AvailableContent_ReturnsExpectedTitle() + { + // Arrange + var url = $"https://www.reddit.com/r/subreddit"; + var json = JsonSerializer.Serialize(new { data = new { } }); + + var mockHandler = new MockHttpMessageHandler(json, System.Net.HttpStatusCode.OK); + var httpClient = new HttpClient(mockHandler); + _mockHttpClientFactory.CreateClient("BreefDownloader").Returns(httpClient); + + // Act + var result = await _extractor.ExtractAsync(url); + + // Assert + result.Title.ShouldBe($"New in r/subreddit"); + } + + [Fact] + public async Task ExtractAsync_AvailableContent_ReturnsExpectedContent() + { + // Arrange + var url = $"https://www.reddit.com/r/subreddit"; + var samplePost = new RedditPost + { + Post = new RedditPostContent + { + Id = "abc123", + Title = "Test Post", + Author = "testuser", + Subreddit = "subreddit", + Score = 100, + Content = "Test content", + CreatedUtc = new DateTime(2024, 1, 1, 0, 0, 0, DateTimeKind.Utc) + }, + Comments = new List() + }; + + var newInSubreddit = new NewInSubreddit + { + Posts = new List { samplePost } + }; + var expectedJson = JsonSerializer.Serialize(newInSubreddit); + + _mockSubredditClient.GetNewInSubreddit("subreddit").Returns(newInSubreddit); + + var mockHandler = new MockHttpMessageHandler(JsonSerializer.Serialize(new { data = new { } }), System.Net.HttpStatusCode.OK); + var httpClient = new HttpClient(mockHandler); + _mockHttpClientFactory.CreateClient("BreefDownloader").Returns(httpClient); + + // Act + var result = await _extractor.ExtractAsync(url); + + // Assert + result.Content.ShouldBe(expectedJson); + + var deserializedContent = JsonSerializer.Deserialize(result.Content); + deserializedContent.ShouldNotBeNull(); + deserializedContent.Posts.Count.ShouldBe(1); + deserializedContent.Posts[0].Post.Id.ShouldBe("abc123"); + deserializedContent.Posts[0].Post.Title.ShouldBe("Test Post"); + } + + [Theory] + [InlineData("https://www.reddit.com/r/testsubreddit")] + [InlineData("https://www.reddit.com/r/testsubreddit/")] + public async Task ExtractAsync_ValidUrl_CallsSubredditClientWithCorrectName(string subredditUrl) + { + // Arrange + var mockHandler = new MockHttpMessageHandler(JsonSerializer.Serialize(new { data = new { } }), System.Net.HttpStatusCode.OK); + var httpClient = new HttpClient(mockHandler); + _mockHttpClientFactory.CreateClient("BreefDownloader").Returns(httpClient); + + // Act + await _extractor.ExtractAsync(subredditUrl); + + // Assert + await _mockSubredditClient.Received(1).GetNewInSubreddit("testsubreddit"); + } + + [Theory] + [InlineData("icon_img")] + [InlineData("community_icon")] + [InlineData("banner_background_image")] + [InlineData("banner_img")] + [InlineData("mobile_banner_image")] + public async Task GetSubredditImageUrlAsync_ImageKeyExistsAndIsAccessible_ReturnsImageUrl(string imageKey) + { + // Arrange + var subredditName = "programming"; + var imageUrl = $"https://img.reddit.com/{imageKey}.png"; + var json = CreateJsonWithImageKey(imageKey, imageUrl); + + var mockHandler = new MockHttpMessageHandler(json, System.Net.HttpStatusCode.OK); + var httpClient = new HttpClient(mockHandler); + _mockHttpClientFactory.CreateClient("BreefDownloader").Returns(httpClient); + + // Act + var result = await _extractor.GetSubredditImageUrlAsync(subredditName); + + // Assert + result.ShouldBe(imageUrl); + } + + [Theory] + [InlineData("programming")] + [InlineData("learnprogramming")] + [InlineData("AskReddit")] + [InlineData("funny")] + public async Task GetSubredditImageUrlAsync_ValidSubredditName_CallsCorrectAboutUrl(string subredditName) + { + // Arrange + var expectedUrl = $"https://www.reddit.com/r/{subredditName}/about.json"; + var json = JsonSerializer.Serialize(new { data = new { } }); + + var mockHandler = new MockHttpMessageHandler(json, System.Net.HttpStatusCode.OK); + var httpClient = new HttpClient(mockHandler); + _mockHttpClientFactory.CreateClient("BreefDownloader").Returns(httpClient); + + // Act + await _extractor.GetSubredditImageUrlAsync(subredditName); + + // Assert + // Since we're using MockHttpMessageHandler, we can't easily verify the exact URL called + // The test passes if no exception is thrown and the method completes successfully + } + + [Fact] + public async Task GetSubredditImageUrlAsync_NoImageKeysExist_ReturnsFallbackImageUrl() + { + // Arrange + var subredditName = "programming"; + var json = JsonSerializer.Serialize(new { data = new { } }); + + var mockHandler = new MockHttpMessageHandler(json, System.Net.HttpStatusCode.OK); + var httpClient = new HttpClient(mockHandler); + _mockHttpClientFactory.CreateClient("BreefDownloader").Returns(httpClient); + + // Act + var result = await _extractor.GetSubredditImageUrlAsync(subredditName); + + // Assert + result.ShouldBe(FallbackImageUrl); + } + + [Fact] + public async Task GetSubredditImageUrlAsync_ImageExistsButNotAccessible_ReturnsFallbackImageUrl() + { + // Arrange + var subredditName = "programming"; + var imageUrl = "https://img.reddit.com/icon.png"; + var json = CreateJsonWithImageKey("icon_img", imageUrl); + + var mockHandler = new MockHttpMessageHandler(json, System.Net.HttpStatusCode.OK, imageUrl, System.Net.HttpStatusCode.NotFound); + var httpClient = new HttpClient(mockHandler); + _mockHttpClientFactory.CreateClient("BreefDownloader").Returns(httpClient); + + // Act + var result = await _extractor.GetSubredditImageUrlAsync(subredditName); + + // Assert + result.ShouldBe(FallbackImageUrl); + } + + [Fact] + public async Task GetSubredditImageUrlAsync_MultipleImageKeys_ReturnsFirstAccessibleImage() + { + // Arrange + var subredditName = "programming"; + var bannerImageUrl = "https://img.reddit.com/banner.png"; + var iconImageUrl = "https://img.reddit.com/icon.png"; + + var json = JsonSerializer.Serialize(new + { + data = new Dictionary + { + { "banner_background_image", bannerImageUrl }, + { "icon_img", iconImageUrl } + } + }); + + var mockHandler = new MockHttpMessageHandler(json, System.Net.HttpStatusCode.OK); + var httpClient = new HttpClient(mockHandler); + _mockHttpClientFactory.CreateClient("BreefDownloader").Returns(httpClient); + + // Act + var result = await _extractor.GetSubredditImageUrlAsync(subredditName); + + // Assert + result.ShouldBe(bannerImageUrl); + } + + [Fact] + public async Task GetSubredditImageUrlAsync_FirstImageNotAccessible_ReturnsSecondImage() + { + // Arrange + var subredditName = "programming"; + var bannerImageUrl = "https://img.reddit.com/banner.png"; + var iconImageUrl = "https://img.reddit.com/icon.png"; + + var json = JsonSerializer.Serialize(new + { + data = new Dictionary + { + { "banner_background_image", bannerImageUrl }, + { "icon_img", iconImageUrl } + } + }); + + var mockHandler = new MockHttpMessageHandler(json, System.Net.HttpStatusCode.OK, bannerImageUrl, System.Net.HttpStatusCode.NotFound); + var httpClient = new HttpClient(mockHandler); + _mockHttpClientFactory.CreateClient("BreefDownloader").Returns(httpClient); + + // Act + var result = await _extractor.GetSubredditImageUrlAsync(subredditName); + + // Assert + result.ShouldBe(iconImageUrl); + } + + [Fact] + public async Task GetSubredditImageUrlAsync_HttpDownloaderThrows_PropagatesException() + { + // Arrange + var subredditName = "programming"; + var mockHandler = new ThrowingMockHttpMessageHandler(new HttpRequestException("Network error")); + var httpClient = new HttpClient(mockHandler); + _mockHttpClientFactory.CreateClient("BreefDownloader").Returns(httpClient); + + // Act + var test = await Should.ThrowAsync(() + => _extractor.GetSubredditImageUrlAsync(subredditName)); + + // Assert + test.Message.ShouldBe("Network error"); + } + + [Theory] + [InlineData("icon_img", null)] + [InlineData("community_icon", "")] + [InlineData("banner_background_image", " ")] + [InlineData("banner_img", "\t")] + [InlineData("mobile_banner_image", "\n")] + [InlineData("icon_img", "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==")] + [InlineData("community_icon", "ftp://example.com/image.png")] + [InlineData("banner_background_image", "file:///c:/images/banner.png")] + [InlineData("banner_img", "javascript:alert('xss')")] + [InlineData("mobile_banner_image", "mailto:test@example.com")] + public async Task GetSubredditImageUrlAsync_ImageUrlIsUnsuitable_UsesFallbackImageUrl(string imageKey, string? imageUrl) + { + // Arrange + var subredditName = "programming"; + var json = CreateJsonWithImageKey(imageKey, imageUrl); + + var mockHandler = new MockHttpMessageHandler(json, System.Net.HttpStatusCode.OK); + var httpClient = new HttpClient(mockHandler); + _mockHttpClientFactory.CreateClient("BreefDownloader").Returns(httpClient); + + // Act + var result = await _extractor.GetSubredditImageUrlAsync(subredditName); + + // Assert + result.ShouldBe(FallbackImageUrl); + } + + [Theory] + [InlineData("icon_img", "not-a-valid-url")] + [InlineData("community_icon", "://invalid-url")] + [InlineData("banner_background_image", "http://")] + [InlineData("banner_img", "https://")] + public async Task GetSubredditImageUrlAsync_ImageUrlIsInvalidUri_UsesFallbackImageUrl(string imageKey, string imageUrl) + { + // Arrange + var subredditName = "programming"; + var json = CreateJsonWithImageKey(imageKey, imageUrl); + + var mockHandler = new MockHttpMessageHandler(json, System.Net.HttpStatusCode.OK); + var httpClient = new HttpClient(mockHandler); + _mockHttpClientFactory.CreateClient("BreefDownloader").Returns(httpClient); + + // Act + var result = await _extractor.GetSubredditImageUrlAsync(subredditName); + + // Assert + result.ShouldBe(FallbackImageUrl); + } + + [Fact] + public async Task GetSubredditImageUrlAsync_MixedValidAndInvalidUrls_UsesFirstValidHttpUrl() + { + // Arrange + var subredditName = "programming"; + var validImageUrl = "https://img.reddit.com/valid-icon.png"; + + var json = JsonSerializer.Serialize(new + { + data = new Dictionary + { + { "banner_background_image", "data:image/png;base64,invalid" }, + { "banner_img", "" }, + { "mobile_banner_image", " " }, + { "icon_img", validImageUrl }, + { "community_icon", "https://img.reddit.com/another-icon.png" } + } + }); + + var mockHandler = new MockHttpMessageHandler(json, System.Net.HttpStatusCode.OK); + var httpClient = new HttpClient(mockHandler); + _mockHttpClientFactory.CreateClient("BreefDownloader").Returns(httpClient); + + // Act + var result = await _extractor.GetSubredditImageUrlAsync(subredditName); + + // Assert + result.ShouldBe(validImageUrl); + } + + [Fact] + public async Task ExtractAsync_UrlWithQueryString_ExtractsCorrectSubredditName() + { + // Arrange + var json = JsonSerializer.Serialize(new { data = new { } }); + var mockHandler = new MockHttpMessageHandler(json, System.Net.HttpStatusCode.OK); + var httpClient = new HttpClient(mockHandler); + _mockHttpClientFactory.CreateClient("BreefDownloader").Returns(httpClient); + + // Act - URL with both query string and fragment + var result = await _extractor.ExtractAsync("https://www.reddit.com/r/dotnet/?utm_source=share#section"); + + // Assert + result.Title.ShouldBe("New in r/dotnet"); + await _mockSubredditClient.Received(1).GetNewInSubreddit("dotnet"); + } + + [Theory] + [InlineData("null")] + [InlineData("empty")] + [InlineData("whitespace")] + [InlineData("non-http")] + [InlineData("invalid-uri")] + public async Task ExtractAsync_ImageUrlIsInvalid_UsesFallbackImageUrl(string invalidType) + { + // Arrange + var url = "https://www.reddit.com/r/subreddit"; + string? imageUrl = invalidType switch + { + "null" => null, + "empty" => "", + "whitespace" => " ", + "non-http" => "data:image/png;base64,invalid", + "invalid-uri" => "not-a-valid-url", + _ => throw new ArgumentException($"Unknown invalid type: {invalidType}") + }; + + var json = CreateJsonWithImageKey("icon_img", imageUrl); + + _mockSubredditClient.GetNewInSubreddit("subreddit") + .Returns(new NewInSubreddit { Posts = new List() }); + + var mockHandler = new MockHttpMessageHandler(json, System.Net.HttpStatusCode.OK); + var httpClient = new HttpClient(mockHandler); + _mockHttpClientFactory.CreateClient("BreefDownloader").Returns(httpClient); + + // Act + var result = await _extractor.ExtractAsync(url); + + // Assert + result.PreviewImageUrl.ShouldBe(FallbackImageUrl); + } + + private static string CreateJsonWithImageKey(string key, string? value) + { + var data = new Dictionary(); + if (value != null) + { + data[key] = value; + } + else + { + data[key] = null; + } + + return JsonSerializer.Serialize(new { data }); + } + + private class MockHttpMessageHandler : HttpMessageHandler + { + private readonly string _defaultResponse; + private readonly System.Net.HttpStatusCode _defaultStatusCode; + private readonly string? _failUrl; + private readonly System.Net.HttpStatusCode _failStatusCode; + + public MockHttpMessageHandler(string defaultResponse, System.Net.HttpStatusCode defaultStatusCode, string? failUrl = null, System.Net.HttpStatusCode failStatusCode = System.Net.HttpStatusCode.NotFound) + { + _defaultResponse = defaultResponse; + _defaultStatusCode = defaultStatusCode; + _failUrl = failUrl; + _failStatusCode = failStatusCode; + } + + protected override Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) + { + if (_failUrl != null && request.RequestUri?.AbsoluteUri == _failUrl) + { + return Task.FromResult(new HttpResponseMessage + { + StatusCode = _failStatusCode, + Content = new StringContent("") + }); + } + + return Task.FromResult(new HttpResponseMessage + { + StatusCode = _defaultStatusCode, + Content = new StringContent(_defaultResponse) + }); + } + } + + private class ThrowingMockHttpMessageHandler : HttpMessageHandler + { + private readonly Exception _exception; + + public ThrowingMockHttpMessageHandler(Exception exception) + { + _exception = exception; + } + + protected override Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) + { + throw _exception; + } + } + } +} diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentSummariserTests.cs b/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentSummariserTests.cs index 72acbd3..708a5ec 100644 --- a/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentSummariserTests.cs +++ b/tests/Elzik.Breef.Infrastructure.Tests.Unit/ContentSummariserTests.cs @@ -62,7 +62,7 @@ public async Task SummariseAsync_ValidContent_ReturnsSummary() public async Task SummariseAsync_ValidContent_ProvidesModelInstructions() { // Act - var result = await _contentSummariser.SummariseAsync(_testContent); + _ = await _contentSummariser.SummariseAsync(_testContent); // Assert var systemPrompt = @$" diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Unit/Elzik.Breef.Infrastructure.Tests.Unit.csproj b/tests/Elzik.Breef.Infrastructure.Tests.Unit/Elzik.Breef.Infrastructure.Tests.Unit.csproj index 3f1c0af..eab1d21 100644 --- a/tests/Elzik.Breef.Infrastructure.Tests.Unit/Elzik.Breef.Infrastructure.Tests.Unit.csproj +++ b/tests/Elzik.Breef.Infrastructure.Tests.Unit/Elzik.Breef.Infrastructure.Tests.Unit.csproj @@ -1,4 +1,4 @@ - + net8.0 @@ -22,7 +22,15 @@ + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + all diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Unit/Wallabag/WallabagBreefPublisherTests.cs b/tests/Elzik.Breef.Infrastructure.Tests.Unit/Wallabag/WallabagBreefPublisherTests.cs index a1fb552..2783abd 100644 --- a/tests/Elzik.Breef.Infrastructure.Tests.Unit/Wallabag/WallabagBreefPublisherTests.cs +++ b/tests/Elzik.Breef.Infrastructure.Tests.Unit/Wallabag/WallabagBreefPublisherTests.cs @@ -26,12 +26,6 @@ public async Task Publish_WhenCalled_ShouldReturnPublishedBreef() "test-title", "test-content", "https://wallabag.elzik.co.uk/img/logo-wallabag.svg"); - var wallabagEntryCreateRequest = new WallabagEntryCreateRequest - { - Content = "test-content", - Url = "https://test.com", - Tags = "breef" - }; var wallabagEntryID = 123; var wallabagEntry = new WallabagEntry { diff --git a/tests/Elzik.Breef.Infrastructure.Tests.Unit/Wallabag/WallabagDateTimeConverterTests.cs b/tests/Elzik.Breef.Infrastructure.Tests.Unit/Wallabag/WallabagDateTimeConverterTests.cs index 1c5828e..ffd92b2 100644 --- a/tests/Elzik.Breef.Infrastructure.Tests.Unit/Wallabag/WallabagDateTimeConverterTests.cs +++ b/tests/Elzik.Breef.Infrastructure.Tests.Unit/Wallabag/WallabagDateTimeConverterTests.cs @@ -2,68 +2,67 @@ using Shouldly; using System.Text.Json; -namespace Elzik.Breef.Tests.Infrastructure.Wallabag +namespace Elzik.Breef.Infrastructure.Tests.Unit.Wallabag; + +public class WallabagDateTimeConverterTests { - public class WallabagDateTimeConverterTests + private readonly WallabagDateTimeConverter _wallabagDateTimeConverter = new(); + + [Fact] + public void Read_ValidDate_ReturnsExpectedDate() { - private readonly WallabagDateTimeConverter _wallabagDateTimeConverter = new(); + // Arrange + var json = "\"2023-10-01T12:34:56Z\""; + var reader = new Utf8JsonReader(System.Text.Encoding.UTF8.GetBytes(json)); + reader.Read(); - [Fact] - public void Read_ValidDate_ReturnsExpectedDate() - { - // Arrange - var json = "\"2023-10-01T12:34:56Z\""; - var reader = new Utf8JsonReader(System.Text.Encoding.UTF8.GetBytes(json)); - reader.Read(); + // Act + var result = _wallabagDateTimeConverter.Read(ref reader, typeof(DateTime), new JsonSerializerOptions()); - // Act - var result = _wallabagDateTimeConverter.Read(ref reader, typeof(DateTime), new JsonSerializerOptions()); + // Assert + result.ToUniversalTime().ShouldBe(new DateTime(2023, 10, 1, 12, 34, 56, DateTimeKind.Utc)); + } - // Assert - result.ToUniversalTime().ShouldBe(new DateTime(2023, 10, 1, 12, 34, 56, DateTimeKind.Utc)); - } + [Theory] + [InlineData("12345", "Expected string token.")] + [InlineData("\"invalid-date\"", "Unable to convert \"invalid-date\" to a Wallabag DateTime.")] + public void Read_InvalidInput_Throws(string testJson, string expectedMessage) + { + // Arrange + var testReader = new Utf8JsonReader(System.Text.Encoding.UTF8.GetBytes(testJson)); + testReader.Read(); - [Theory] - [InlineData("12345", "Expected string token.")] - [InlineData("\"invalid-date\"", "Unable to convert \"invalid-date\" to a Wallabag DateTime.")] - public void Read_InvalidInput_Throws(string testJson, string expectedMessage) + // Act + JsonException ex; + try { - // Arrange - var testReader = new Utf8JsonReader(System.Text.Encoding.UTF8.GetBytes(testJson)); - testReader.Read(); - - // Act - JsonException ex; - try - { - _wallabagDateTimeConverter.Read(ref testReader, typeof(DateTime), new JsonSerializerOptions()); - throw new Exception("Expected JsonException was not thrown."); - } - catch (JsonException e) - { - ex = e; - } - - // Assert - ex.Message.ShouldBe(expectedMessage); + _wallabagDateTimeConverter.Read(ref testReader, typeof(DateTime), new JsonSerializerOptions()); + throw new Exception("Expected JsonException was not thrown."); } - - [Fact] - public void Write_ShouldConvertDateTimeToString() + catch (JsonException e) { - // Arrange - var testDateTime = new DateTime(2023, 10, 1, 12, 34, 56, DateTimeKind.Utc); - var testOptions = new JsonSerializerOptions { Converters = { _wallabagDateTimeConverter } }; - var testBuffer = new System.Buffers.ArrayBufferWriter(); - var testWriter = new Utf8JsonWriter(testBuffer); + ex = e; + } - // Act - _wallabagDateTimeConverter.Write(testWriter, testDateTime, testOptions); + // Assert + ex.Message.ShouldBe(expectedMessage); + } - // Assert - testWriter.Flush(); - var writtenJson = System.Text.Encoding.UTF8.GetString(testBuffer.WrittenMemory.ToArray()); - writtenJson.ShouldBe("\"2023-10-01T12:34:56Z\""); - } + [Fact] + public void Write_ShouldConvertDateTimeToString() + { + // Arrange + var testDateTime = new DateTime(2023, 10, 1, 12, 34, 56, DateTimeKind.Utc); + var testOptions = new JsonSerializerOptions { Converters = { _wallabagDateTimeConverter } }; + var testBuffer = new System.Buffers.ArrayBufferWriter(); + var testWriter = new Utf8JsonWriter(testBuffer); + + // Act + _wallabagDateTimeConverter.Write(testWriter, testDateTime, testOptions); + + // Assert + testWriter.Flush(); + var writtenJson = System.Text.Encoding.UTF8.GetString(testBuffer.WrittenMemory.ToArray()); + writtenJson.ShouldBe("\"2023-10-01T12:34:56Z\""); } }