From b39e909895655632c3207b4fe7db91ad99733f31 Mon Sep 17 00:00:00 2001 From: Marcos Cordeiro Date: Thu, 12 Oct 2023 21:03:43 -0300 Subject: [PATCH] More improvements to episode matching Uses F23.StringSimilarity to better determine string similarities --- Wasari.Crunchyroll/EpisodeExtensions.cs | 41 +++++++++---------- Wasari.Crunchyroll/Wasari.Crunchyroll.csproj | 1 + Wasari.Tests/EpisodeMatchesTest.cs | 10 ++--- Wasari.Tvdb.Abstractions/WasariTvdbEpisode.cs | 2 +- .../Services/TvdbEpisodesService.cs | 26 +++++++++--- Wasari.Tvdb/ITvdbApi.cs | 3 ++ Wasari.Tvdb/Models/TvdbEpisode.cs | 2 +- 7 files changed, 52 insertions(+), 33 deletions(-) diff --git a/Wasari.Crunchyroll/EpisodeExtensions.cs b/Wasari.Crunchyroll/EpisodeExtensions.cs index e88fa2b..207a30c 100644 --- a/Wasari.Crunchyroll/EpisodeExtensions.cs +++ b/Wasari.Crunchyroll/EpisodeExtensions.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; +using F23.StringSimilarity; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; @@ -13,6 +14,8 @@ namespace Wasari.Crunchyroll; public static partial class EpisodeExtensions { + private static readonly NormalizedLevenshtein Levenshtein = new NormalizedLevenshtein(); + [GeneratedRegex("[a-zA-Z0-9 ]+")] private static partial Regex EpisodeTitleNormalizeRegex(); @@ -156,28 +159,17 @@ private static WasariTvdbEpisode FindEpisodeByNormalizedWordProximity(IEnumerabl var episodeName = episode.Title .ToLowerInvariant() .NormalizeUsingRegex(); - - var unmatchedEpisodeTitleWords = episodeName.Split(' '); - - var possibleEpisodes = wasariApiEpisodes.Where(o => !o.Matched) - .Select(wasariEpisode => + + var possibleEpisodes = wasariApiEpisodes + .Where(o => !o.Matched) + .Select(i => new { - var wasariEpisodeTitleWords = wasariEpisode.Name + Episode = i, + Distance = Levenshtein.Distance(i.Name .ToLowerInvariant() - .NormalizeUsingRegex() - .Split(' '); - - var matchedCount = unmatchedEpisodeTitleWords.Intersect(wasariEpisodeTitleWords).Count(); - return new - { - Episode = wasariEpisode, - EpisodeTitle = wasariEpisode.Name, - UnmatchedEpisodeTitle = episode.Title, - MatchesTitleWords = matchedCount, - MatchPercentage = (double)matchedCount / wasariEpisodeTitleWords.Length - }; + .NormalizeUsingRegex(), episodeName) }) - .OrderByDescending(i => i.MatchPercentage) + .OrderBy(i => i.Distance) .Take(2) .ToList(); @@ -188,11 +180,18 @@ private static WasariTvdbEpisode FindEpisodeByNormalizedWordProximity(IEnumerabl .SingleOrDefault(); } - var delta = possibleEpisodes[0].MatchPercentage - possibleEpisodes[1].MatchPercentage; - if (delta > 0.1) + var delta = possibleEpisodes[1].Distance - possibleEpisodes[0].Distance; + if (possibleEpisodes[0].Distance < 0.5 && delta > 0.4) { return possibleEpisodes[0].Episode; } + + if(possibleEpisodes + .Where(i => i.Episode.SeasonNumber == episode.SeasonNumber && (i.Episode.Number == episode.EpisodeNumber || i.Episode.CalculatedAbsoluteNumber == episode.SequenceNumber)) + .SingleOrDefaultIfMultiple() is {} ep) + { + return ep.Episode; + } return default; } diff --git a/Wasari.Crunchyroll/Wasari.Crunchyroll.csproj b/Wasari.Crunchyroll/Wasari.Crunchyroll.csproj index 42cde66..e321c87 100644 --- a/Wasari.Crunchyroll/Wasari.Crunchyroll.csproj +++ b/Wasari.Crunchyroll/Wasari.Crunchyroll.csproj @@ -5,6 +5,7 @@ + diff --git a/Wasari.Tests/EpisodeMatchesTest.cs b/Wasari.Tests/EpisodeMatchesTest.cs index 2fae37e..d89d654 100644 --- a/Wasari.Tests/EpisodeMatchesTest.cs +++ b/Wasari.Tests/EpisodeMatchesTest.cs @@ -47,15 +47,15 @@ private static IServiceProvider BuildServiceProvider() )] [DataRow( "GRZXQJJ8Y" // The Ancient Magus' Bride - , 4 + , 6 )] [DataRow( "GYEXQKJG6" // Dr. STONE - , 1 + , 2 )] [DataRow( "GRDV0019R" // Jujustu Kaisen - , 2 + , 0 )] [DataRow( "G4PH0WXVJ" // SPY x FAMILY @@ -73,10 +73,10 @@ public async Task MatchAllEpisodes(string seriesId, int expectedNonEnrichedCount Assert.AreEqual(episodes.Length, enrichedEpisodes.Length); - var allEpisodesWereEnriched = enrichedEpisodes + var allEpisodesWereNotEnriched = enrichedEpisodes .Where(i => !i.WasEnriched) .ToList(); - Assert.AreEqual(expectedNonEnrichedCount, allEpisodesWereEnriched.Count, "Not all expected episodes were enriched"); + Assert.AreEqual(expectedNonEnrichedCount, allEpisodesWereNotEnriched.Count, "Not all expected episodes were enriched"); } } \ No newline at end of file diff --git a/Wasari.Tvdb.Abstractions/WasariTvdbEpisode.cs b/Wasari.Tvdb.Abstractions/WasariTvdbEpisode.cs index a6841a8..14261ed 100644 --- a/Wasari.Tvdb.Abstractions/WasariTvdbEpisode.cs +++ b/Wasari.Tvdb.Abstractions/WasariTvdbEpisode.cs @@ -2,7 +2,7 @@ namespace Wasari.Tvdb.Abstractions; -public record WasariTvdbEpisode(string Name, int? SeasonNumber, int? Number, bool IsMovie, string? Prefix) +public record WasariTvdbEpisode(int Id, string Name, int? SeasonNumber, int? Number, bool IsMovie, string? Prefix, string SeriesId, int? CalculatedAbsoluteNumber) { [JsonIgnore] public bool Matched { get; set; } diff --git a/Wasari.Tvdb.Api/Services/TvdbEpisodesService.cs b/Wasari.Tvdb.Api/Services/TvdbEpisodesService.cs index f15f726..7d33d8e 100644 --- a/Wasari.Tvdb.Api/Services/TvdbEpisodesService.cs +++ b/Wasari.Tvdb.Api/Services/TvdbEpisodesService.cs @@ -40,15 +40,31 @@ public async ValueTask GetEpisodes(string query) }); var series = tvdbSearchResponseSeries.Single(); + var seriesWithEpisodes = await TvdbApi.GetSeriesAsync(series.TvdbId); + var currentEpiosdeNumber = 1; + return Results.Ok(seriesWithEpisodes.Data.Episodes .Where(i => !string.IsNullOrEmpty(i.Name)) - .Select(i => new WasariTvdbEpisode(i.Name, i.SeasonNumber, i.Number, i.IsMovie switch + .OrderBy(i => i.SeasonNumber) + .ThenBy(i => i.Number) + .Select(ep => { - 0 => false, - 1 => true, - _ => throw new ArgumentException("IsMovie flag is not 0 or 1") - }, i is { SeasonNumber: not null, Number: not null } ? $"S{i.SeasonNumber:00}E{i.Number:00}" : null))); + var episode = new WasariTvdbEpisode(ep.Id, ep.Name, ep.SeasonNumber, ep.Number, ep.IsMovie switch + { + 0 => false, + 1 => true, + _ => throw new ArgumentException("IsMovie flag is not 0 or 1") + }, ep is { SeasonNumber: not null, Number: not null } ? $"S{ep.SeasonNumber:00}E{ep.Number:00}" : null, + series.Id, + ep.SeasonNumber > 0 ? currentEpiosdeNumber : null); + + if(ep.SeasonNumber > 0) + currentEpiosdeNumber++; + + return episode; + }) + ); } } \ No newline at end of file diff --git a/Wasari.Tvdb/ITvdbApi.cs b/Wasari.Tvdb/ITvdbApi.cs index e38cdb1..d8c9312 100644 --- a/Wasari.Tvdb/ITvdbApi.cs +++ b/Wasari.Tvdb/ITvdbApi.cs @@ -10,4 +10,7 @@ public interface ITvdbApi [Get("/series/{id}/episodes/{seasonType}/{lang}")] Task> GetSeriesAsync(string id, string seasonType = "default", string lang = "eng", int page = 0); + + [Get("/series/{id}/episodes/{seasonType}/{lang}")] + Task GetSeriesRawAsync(string id, string seasonType = "default", string lang = "eng", int page = 0); } \ No newline at end of file diff --git a/Wasari.Tvdb/Models/TvdbEpisode.cs b/Wasari.Tvdb/Models/TvdbEpisode.cs index e2bdde9..e8d992f 100644 --- a/Wasari.Tvdb/Models/TvdbEpisode.cs +++ b/Wasari.Tvdb/Models/TvdbEpisode.cs @@ -3,7 +3,7 @@ namespace Wasari.Tvdb.Models; public record TvdbEpisode( - [property: JsonPropertyName("id")] int? Id, + [property: JsonPropertyName("id")] int Id, [property: JsonPropertyName("seriesId")] int? SeriesId, [property: JsonPropertyName("name")] string Name, [property: JsonPropertyName("aired")] string Aired,