diff --git a/scanner/matcher/matcher.py b/scanner/matcher/matcher.py index 73f5f41a2..c4835f6f7 100644 --- a/scanner/matcher/matcher.py +++ b/scanner/matcher/matcher.py @@ -51,15 +51,10 @@ async def _identify(self, path: str): if "mimetype" not in raw or not raw["mimetype"].startswith("video"): return - # Remove seasons in "One Piece (1999) 152.mkv" for example - if raw.get("season") == raw.get("year") and "season" in raw: - del raw["season"] logger.info("Identied %s: %s", path, raw) title = raw.get("title") - if isinstance(title, list): - title = title[0] if not isinstance(title, str): raise ProviderError(f"Could not guess title, found: {title}") diff --git a/scanner/matcher/parser/guess.py b/scanner/matcher/parser/guess.py index 431f6f457..54b96a0f7 100644 --- a/scanner/matcher/parser/guess.py +++ b/scanner/matcher/parser/guess.py @@ -7,7 +7,7 @@ sys.path.append(str(Path(f"{__file__}/../../..").resolve())) from guessit.api import default_api -from typing import cast, List +from typing import cast, List, Any from rebulk import Rebulk try: @@ -20,14 +20,15 @@ rblk.rules(rules) -def guessit(name: str, *, xem_titles: List[str] = []): +def guessit(name: str, *, xem_titles: List[str] = [], extra_flags: dict[str, Any] = {}): return default_api.guessit( name, { "episode_prefer_number": True, "excludes": "language", "xem_titles": xem_titles, - }, + } + | extra_flags, ) @@ -44,7 +45,11 @@ async def main(): async with ClientSession() as client: xem = TheXemClient(client) - ret = guessit(sys.argv[1], xem_titles=await xem.get_expected_titles()) + ret = guessit( + sys.argv[1], + xem_titles=await xem.get_expected_titles(), + # extra_flags={"advanced": True}, + ) print(json.dumps(ret, cls=GuessitEncoder, indent=4)) asyncio.run(main()) diff --git a/scanner/matcher/parser/rules.py b/scanner/matcher/parser/rules.py index 6faeb5c50..d7fb05171 100644 --- a/scanner/matcher/parser/rules.py +++ b/scanner/matcher/parser/rules.py @@ -1,5 +1,6 @@ # Read that for examples/rules: https://github.com/pymedusa/Medusa/blob/master/medusa/name_parser/rules/rules.py +from logging import getLogger from typing import Any, List, Optional, cast from rebulk import Rule, RemoveMatch, AppendMatch, POST_PROCESS from rebulk.match import Matches, Match @@ -7,6 +8,68 @@ from providers.implementations.thexem import clean +logger = getLogger(__name__) + + +class UnlistTitles(Rule): + """Join titles to a single string instead of a list + + Example: '/media/series/Demon Slayer - Kimetsu no Yaiba/Season 4/Demon Slayer - Kimetsu no Yaiba - S04E10 - Love Hashira Mitsuri Kanroji WEBDL-1080p.mkv' + Default: + ```json + { + "title": [ + "Demon Slayer", + "Kimetsu no Yaiba" + ], + "season": 4, + "episode_title": "Demon Slayer", + "alternative_title": "Kimetsu no Yaiba", + "episode": 10, + "source": "Web", + "screen_size": "1080p", + "container": "mkv", + "mimetype": "video/x-matroska", + "type": "episode" + } + ``` + Expected: + ```json + { + "title": "Demon Slayer - Kimetsu no Yaiba", + "season": 4, + "episode_title": "Demon Slayer", + "alternative_title": "Kimetsu no Yaiba", + "episode": 10, + "source": "Web", + "screen_size": "1080p", + "container": "mkv", + "mimetype": "video/x-matroska", + "type": "episode" + } + ``` + """ + + priority = POST_PROCESS + consequence = [RemoveMatch, AppendMatch] + + def when(self, matches: Matches, context) -> Any: + titles: List[Match] = matches.named("title") # type: ignore + + if not titles or len(titles) <= 1: + return + + title = copy(titles[0]) + for nmatch in titles[1:]: + # Check if titles are next to each other, if they are not ignore it. + next: List[Match] = matches.next(title) # type: ignore + if not next or next[0] != nmatch: + logger.warn(f"Ignoring potential part of title: {nmatch.value}") + continue + title.end = nmatch.end + + return [titles, [title]] + class EpisodeTitlePromotion(Rule): """Promote "episode_title" to "episode" when the title is in fact the episode number @@ -15,19 +78,19 @@ class EpisodeTitlePromotion(Rule): Default: ```json { - "release_group": "Erai-raws", - "title": "Youkoso Jitsuryoku Shijou Shugi no Kyoushitsu e", - "season": 3, - "episode_title": "05", + "release_group": "Erai-raws", + "title": "Youkoso Jitsuryoku Shijou Shugi no Kyoushitsu e", + "season": 3, + "episode_title": "05", } ``` Expected: ```json { - "release_group": "Erai-raws", - "title": "Youkoso Jitsuryoku Shijou Shugi no Kyoushitsu e", - "season": 3, - "episode": 5, + "release_group": "Erai-raws", + "title": "Youkoso Jitsuryoku Shijou Shugi no Kyoushitsu e", + "season": 3, + "episode": 5, } ``` """ @@ -58,22 +121,22 @@ class TitleNumberFixup(Rule): Default: ```json { - "release_group": "Erai-raws", - "title": "Zom", - "episode": [ - 100, - 1 - ], - "episode_title": "Zombie ni Naru made ni Shitai", + "release_group": "Erai-raws", + "title": "Zom", + "episode": [ + 100, + 1 + ], + "episode_title": "Zombie ni Naru made ni Shitai", } ``` Expected: ```json { - "release_group": "Erai-raws", - "title": "Zom 100", - "episode": 1, - "episode_title": "Zombie ni Naru made ni Shitai 100 no Koto", + "release_group": "Erai-raws", + "title": "Zom 100", + "episode": 1, + "episode_title": "Zombie ni Naru made ni Shitai 100 no Koto", } ``` """ @@ -126,24 +189,24 @@ class MultipleSeasonRule(Rule): Default: ```json { - "title": "Spy x Family", - "season": [ - 2, - 3, - 4, - 5, - 6, - 7, - 8 - ], + "title": "Spy x Family", + "season": [ + 2, + 3, + 4, + 5, + 6, + 7, + 8 + ], } ``` Expected: ```json { - "title": "Spy x Family", - "season": 2, - "episode": 8, + "title": "Spy x Family", + "season": 2, + "episode": 8, } ``` """ @@ -198,16 +261,16 @@ class XemFixup(Rule): Default: ```json { - "title": "JoJo's Bizarre Adventure", - "alternative_title": "Diamond is Unbreakable", - "episode": 12, + "title": "JoJo's Bizarre Adventure", + "alternative_title": "Diamond is Unbreakable", + "episode": 12, } ``` Expected: ```json { - "title": "JoJo's Bizarre Adventure - Diamond is Unbreakable", - "episode": 12, + "title": "JoJo's Bizarre Adventure - Diamond is Unbreakable", + "episode": 12, } ``` @@ -216,16 +279,16 @@ class XemFixup(Rule): Default: ```json { - "title": "Owarimonogatari", - "season": 2, - "episode": 15 + "title": "Owarimonogatari", + "season": 2, + "episode": 15 } ``` Expected: ```json { - "title": "Owarimonogatari S2", - "episode": 15 + "title": "Owarimonogatari S2", + "episode": 15 } ``` """ @@ -253,3 +316,42 @@ def when(self, matches: Matches, context) -> Any: if clean(new_title.value) in context["xem_titles"]: return [[title, nmatch[0]], [new_title]] + + +class SeasonYearDedup(Rule): + """Remove "season" when it's the same as "year" + + Example: "One Piece (1999) 152.mkv" + Default: + ```json + { + "title": "One Piece", + "year": 1999, + "season": 1999, + "episode": 152, + "container": "mkv", + "mimetype": "video/x-matroska", + "type": "episode" + } + ``` + Expected: + ```json + { + "title": "One Piece", + "year": 1999, + "episode": 152, + "container": "mkv", + "mimetype": "video/x-matroska", + "type": "episode" + } + ``` + """ + + priority = POST_PROCESS + consequence = [RemoveMatch] + + def when(self, matches: Matches, context) -> Any: + season: List[Match] = matches.named("season") # type: ignore + year: List[Match] = matches.named("year") # type: ignore + if len(season) == 1 and len(year) == 1 and season[0].value == year[0].value: + return [season] diff --git a/shell.nix b/shell.nix index 6f92b2eb6..406c8321b 100644 --- a/shell.nix +++ b/shell.nix @@ -1,5 +1,5 @@ {pkgs ? import {}}: let - python = pkgs.python311.withPackages (ps: + python = pkgs.python312.withPackages (ps: with ps; [ guessit aiohttp