Skip to content
This repository has been archived by the owner on Apr 13, 2024. It is now read-only.

Commit

Permalink
More improvements to episode matching
Browse files Browse the repository at this point in the history
Uses F23.StringSimilarity to better determine string similarities
  • Loading branch information
redbaty committed Oct 13, 2023
1 parent cdec05f commit b39e909
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 33 deletions.
41 changes: 20 additions & 21 deletions Wasari.Crunchyroll/EpisodeExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using F23.StringSimilarity;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
Expand All @@ -13,6 +14,8 @@ namespace Wasari.Crunchyroll;

public static partial class EpisodeExtensions
{
private static readonly NormalizedLevenshtein Levenshtein = new NormalizedLevenshtein();

[GeneratedRegex("[a-zA-Z0-9 ]+")]
private static partial Regex EpisodeTitleNormalizeRegex();

Expand Down Expand Up @@ -156,28 +159,17 @@ private static WasariTvdbEpisode FindEpisodeByNormalizedWordProximity(IEnumerabl
var episodeName = episode.Title
.ToLowerInvariant()
.NormalizeUsingRegex();

var unmatchedEpisodeTitleWords = episodeName.Split(' ');

var possibleEpisodes = wasariApiEpisodes.Where(o => !o.Matched)
.Select(wasariEpisode =>

var possibleEpisodes = wasariApiEpisodes
.Where(o => !o.Matched)
.Select(i => new
{
var wasariEpisodeTitleWords = wasariEpisode.Name
Episode = i,
Distance = Levenshtein.Distance(i.Name
.ToLowerInvariant()
.NormalizeUsingRegex()
.Split(' ');

var matchedCount = unmatchedEpisodeTitleWords.Intersect(wasariEpisodeTitleWords).Count();
return new
{
Episode = wasariEpisode,
EpisodeTitle = wasariEpisode.Name,
UnmatchedEpisodeTitle = episode.Title,
MatchesTitleWords = matchedCount,
MatchPercentage = (double)matchedCount / wasariEpisodeTitleWords.Length
};
.NormalizeUsingRegex(), episodeName)
})
.OrderByDescending(i => i.MatchPercentage)
.OrderBy(i => i.Distance)
.Take(2)
.ToList();

Expand All @@ -188,11 +180,18 @@ private static WasariTvdbEpisode FindEpisodeByNormalizedWordProximity(IEnumerabl
.SingleOrDefault();
}

var delta = possibleEpisodes[0].MatchPercentage - possibleEpisodes[1].MatchPercentage;
if (delta > 0.1)
var delta = possibleEpisodes[1].Distance - possibleEpisodes[0].Distance;
if (possibleEpisodes[0].Distance < 0.5 && delta > 0.4)
{
return possibleEpisodes[0].Episode;
}

if(possibleEpisodes
.Where(i => i.Episode.SeasonNumber == episode.SeasonNumber && (i.Episode.Number == episode.EpisodeNumber || i.Episode.CalculatedAbsoluteNumber == episode.SequenceNumber))
.SingleOrDefaultIfMultiple() is {} ep)
{
return ep.Episode;
}

return default;
}
Expand Down
1 change: 1 addition & 0 deletions Wasari.Crunchyroll/Wasari.Crunchyroll.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="F23.StringSimilarity" Version="5.1.0" />
<PackageReference Include="FFMpegCore" Version="5.1.0" />
<PackageReference Include="Flurl" Version="3.0.7" />
<PackageReference Include="JsonExtensions" Version="1.2.0" />
Expand Down
10 changes: 5 additions & 5 deletions Wasari.Tests/EpisodeMatchesTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,15 @@ private static IServiceProvider BuildServiceProvider()
)]
[DataRow(
"GRZXQJJ8Y" // The Ancient Magus' Bride
, 4
, 6
)]
[DataRow(
"GYEXQKJG6" // Dr. STONE
, 1
, 2
)]
[DataRow(
"GRDV0019R" // Jujustu Kaisen
, 2
, 0
)]
[DataRow(
"G4PH0WXVJ" // SPY x FAMILY
Expand All @@ -73,10 +73,10 @@ public async Task MatchAllEpisodes(string seriesId, int expectedNonEnrichedCount

Assert.AreEqual(episodes.Length, enrichedEpisodes.Length);

var allEpisodesWereEnriched = enrichedEpisodes
var allEpisodesWereNotEnriched = enrichedEpisodes
.Where(i => !i.WasEnriched)
.ToList();

Assert.AreEqual(expectedNonEnrichedCount, allEpisodesWereEnriched.Count, "Not all expected episodes were enriched");
Assert.AreEqual(expectedNonEnrichedCount, allEpisodesWereNotEnriched.Count, "Not all expected episodes were enriched");
}
}
2 changes: 1 addition & 1 deletion Wasari.Tvdb.Abstractions/WasariTvdbEpisode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

namespace Wasari.Tvdb.Abstractions;

public record WasariTvdbEpisode(string Name, int? SeasonNumber, int? Number, bool IsMovie, string? Prefix)
public record WasariTvdbEpisode(int Id, string Name, int? SeasonNumber, int? Number, bool IsMovie, string? Prefix, string SeriesId, int? CalculatedAbsoluteNumber)
{
[JsonIgnore]
public bool Matched { get; set; }
Expand Down
26 changes: 21 additions & 5 deletions Wasari.Tvdb.Api/Services/TvdbEpisodesService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,31 @@ public async ValueTask<IResult> GetEpisodes(string query)
});

var series = tvdbSearchResponseSeries.Single();

var seriesWithEpisodes = await TvdbApi.GetSeriesAsync(series.TvdbId);

var currentEpiosdeNumber = 1;

return Results.Ok(seriesWithEpisodes.Data.Episodes
.Where(i => !string.IsNullOrEmpty(i.Name))
.Select(i => new WasariTvdbEpisode(i.Name, i.SeasonNumber, i.Number, i.IsMovie switch
.OrderBy(i => i.SeasonNumber)
.ThenBy(i => i.Number)
.Select(ep =>
{
0 => false,
1 => true,
_ => throw new ArgumentException("IsMovie flag is not 0 or 1")
}, i is { SeasonNumber: not null, Number: not null } ? $"S{i.SeasonNumber:00}E{i.Number:00}" : null)));
var episode = new WasariTvdbEpisode(ep.Id, ep.Name, ep.SeasonNumber, ep.Number, ep.IsMovie switch
{
0 => false,
1 => true,
_ => throw new ArgumentException("IsMovie flag is not 0 or 1")
}, ep is { SeasonNumber: not null, Number: not null } ? $"S{ep.SeasonNumber:00}E{ep.Number:00}" : null,
series.Id,
ep.SeasonNumber > 0 ? currentEpiosdeNumber : null);

if(ep.SeasonNumber > 0)
currentEpiosdeNumber++;

return episode;
})
);
}
}
3 changes: 3 additions & 0 deletions Wasari.Tvdb/ITvdbApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,7 @@ public interface ITvdbApi

[Get("/series/{id}/episodes/{seasonType}/{lang}")]
Task<TvdbResponse<TvdbSeries>> GetSeriesAsync(string id, string seasonType = "default", string lang = "eng", int page = 0);

[Get("/series/{id}/episodes/{seasonType}/{lang}")]
Task<string> GetSeriesRawAsync(string id, string seasonType = "default", string lang = "eng", int page = 0);
}
2 changes: 1 addition & 1 deletion Wasari.Tvdb/Models/TvdbEpisode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
namespace Wasari.Tvdb.Models;

public record TvdbEpisode(
[property: JsonPropertyName("id")] int? Id,
[property: JsonPropertyName("id")] int Id,
[property: JsonPropertyName("seriesId")] int? SeriesId,
[property: JsonPropertyName("name")] string Name,
[property: JsonPropertyName("aired")] string Aired,
Expand Down

0 comments on commit b39e909

Please sign in to comment.