Skip to content

Commit

Permalink
Parse rewrite (#128)
Browse files Browse the repository at this point in the history
* Move parser to its own module

* Add ORIGIN to env vars

* Fix overseerr, watchlist, jackett validation.

* Added more refined logic to parser module.

* Set stage for testing

* Add methods for individual checks

* Update sort logic

* Update default settings

* Fix jackett. Begin to add title support for jackett.

---------

Co-authored-by: Spoked <Spoked@localhost>
Co-authored-by: Dreu LaVelle <dreu.lavelle@localhost>
  • Loading branch information
3 people authored and AyushSehrawat committed Jan 15, 2024
1 parent e25bbb0 commit a7708f9
Show file tree
Hide file tree
Showing 7 changed files with 182 additions and 36 deletions.
2 changes: 1 addition & 1 deletion backend/program/content/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def __init__(self, media_items):
self.initialized = False
self.key = "content"
self.running = False
self.sm = ServiceManager(media_items, False, Mdblist, Overseerr, PlexWatchlist)
self.sm = ServiceManager(media_items, False, Overseerr, Mdblist, PlexWatchlist)
if not self.validate():
logger.error("You have no content services enabled, please enable at least one!")
return
Expand Down
11 changes: 11 additions & 0 deletions backend/program/scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from utils.service_manager import ServiceManager
from utils.settings import settings_manager as settings
from utils.logger import logger
from utils.parser import parser
from .torrentio import Torrentio
from .orionoid import Orionoid
from .jackett import Jackett
Expand Down Expand Up @@ -55,3 +56,13 @@ def _needs_new_scrape(self, item) -> bool:
> scrape_time
or item.scraped_times == 0
)
def _check_for_title_match(self, item, string) -> bool:
"""Check if the title matches PTN title"""
parsed_title = parser.get_title(string)
if item.type == "movie":
return parsed_title == item.title
if item.type == "season":
return parsed_title == item.parent.title
if item.type == "episode":
return parsed_title == item.parent.parent.title
return False
35 changes: 15 additions & 20 deletions backend/program/scrapers/jackett.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(self, _):
if not self.initialized or not self.api_key:
return
self.minute_limiter = RateLimiter(max_calls=60, period=60, raise_on_limit=True)
self.second_limiter = RateLimiter(max_calls=1, period=1)
self.second_limiter = RateLimiter(max_calls=1, period=3)
logger.info("Jackett initialized!")

def validate_settings(self) -> bool:
Expand All @@ -35,7 +35,7 @@ def validate_settings(self) -> bool:
try:
url = f"{self.settings.url}/api/v2.0/server/config"
response = get(url=url, retry_if_failed=False, timeout=60)
if response.is_ok:
if response.is_ok and response.data.api_key is not None:
self.api_key = response.data.api_key
return True
except ReadTimeout:
Expand Down Expand Up @@ -79,22 +79,17 @@ def api_scrape(self, item):
url = (
f"{self.settings.url}/api/v2.0/indexers/all/results/torznab?apikey={self.api_key}{query}"
)
try:
with self.second_limiter:
response = get(url=url, retry_if_failed=False, timeout=60)
if response.is_ok:
data = {}
if not hasattr(response.data['rss']['channel'], "item"):
return {}
for stream in response.data['rss']['channel']['item']:
title = stream.get('title')
for attr in stream.get('torznab:attr', []):
if attr.get('@name') == 'infohash':
infohash = attr.get('@value')
if parser.parse(title) and infohash:
data[infohash] = {"name": title}
if len(data) > 0:
return data
return {}
except ReadTimeout:
logger.debug("Jackett timed out for %s", item.log_string)
return {}
if response.is_ok:
data = {}
for stream in response.data['rss']['channel']['item']:
title = stream.get('title')
for attr in stream.get('torznab:attr', []):
if attr.get('@name') == 'infohash':
infohash = attr.get('@value')
if parser.parse(title) and infohash:
data[infohash] = {"name": title}
if len(data) > 0:
return parser.sort_streams(data)
return {}
9 changes: 4 additions & 5 deletions backend/program/scrapers/orionoid.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,8 @@ def api_scrape(self, item):
data = {}
for stream in response.data.data.streams:
title = stream.file.name
infoHash = stream.file.hash
if parser.parse(title) and infoHash:
data[infoHash] = {"name": title}
if parser.parse(title) and stream.file.hash:
data[stream.file.hash] = {"name": title}
if len(data) > 0:
return data
return {}
return parser.sort_streams(data)
return {}
9 changes: 6 additions & 3 deletions backend/program/scrapers/torrentio.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,14 @@ def api_scrape(self, item):
response = get(f"{url}.json", retry_if_failed=False)
if response.is_ok:
data = {}
if len(response.data.streams) == 0:
return data
for stream in response.data.streams:
if parser.parse(stream.title):
title = stream.title.split("\n👤")[0]
if parser.parse(title):
data[stream.infoHash] = {
"name": stream.title.split("\n👤")[0],
"name": title,
}
if len(data) > 0:
return data
return parser.sort_streams(data)
return {}
8 changes: 8 additions & 0 deletions backend/utils/default_settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,5 +47,13 @@
"enabled": false,
"url": "http://localhost:9117"
}
},
"parser": {
"language": ["English"],
"include_4k": false,
"highest_quality": false,
"repack_proper": true,
"dual_audio": true,
"av1_audio": true
}
}
144 changes: 137 additions & 7 deletions backend/utils/parser.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,58 @@
import re
import PTN
from typing import List
from pydantic import BaseModel
from utils.settings import settings_manager


class ParserConfig(BaseModel):
language: List[str]
include_4k: bool
highest_quality: bool
repack_proper: bool
dual_audio: bool # This sometimes doesnt work depending on if other audio is in the title
av1_audio: bool


class Parser:

def __init__(self):
self.settings = ParserConfig(**settings_manager.get("parser"))
self.language = self.settings.language or ["English"]
self.resolution = ["1080p", "720p"]
self.language = ["English"]
self.unwanted_codec = ["H.263", "Xvid"] # Bad for transcoding
self.unwanted_quality = ["Cam", "Telesync", "Telecine", "Screener",
"DVDSCR", "Workprint", "DVD-Rip", "TVRip",
"VODRip", "DVD-R", "DSRip", "BRRip"]
self.quality = [None, "Blu-ray", "WEB-DL", "WEBRip", "HDRip",
"HDTVRip", "BDRip", "Pay-Per-View Rip"]
self.audio = [None, "AAC", "AAC 2.0", "AAC 5.1", "FLAC", "AVC", "Custom"]
self.network = ["Apple TV+", "Amazon Studios", "Netflix",
"Nickelodeon", "YouTube Premium", "Disney Plus",
"DisneyNOW", "HBO Max", "HBO", "Hulu Networks",
"DC Universe", "Adult Swim", "Comedy Central",
"Peacock", "AMC", "PBS", "Crunchyroll",
"Syndication", "Hallmark", "BBC", "VICE",
"MSNBC", "Crave"] # Will probably be used later in `Versions`
self.validate_settings()

def validate_settings(self):
if self.settings.highest_quality:
self.resolution = ["UHD", "2160p", "4K", "1080p", "720p"]
self.audio += ["Dolby TrueHD", "Dolby Atmos",
"Dolby Digital EX", "Dolby Digital Plus",
"Dolby Digital 5.1", "Dolby Digital 7.1",
"Dolby Digital Plus 5.1", "Dolby Digital Plus 7.1"
"DTS-HD MA", "DTS-HD MA", "DTS-HD", "DTS-HD MA 5.1"
"DTS-EX", "DTS:X", "DTS", "5.1", "7.1"]
elif self.settings.include_4k:
self.resolution = ["2160p", "4K", "1080p", "720p"]
else:
self.resolution = ["1080p", "720p"]
if self.settings.dual_audio:
self.audio += ["Dual"]
if not self.settings.av1_audio:
self.unwanted_codec += ["AV1"] # Not all devices support this codec

def _parse(self, string):
parse = PTN.parse(string)
Expand All @@ -19,39 +67,121 @@ def _parse(self, string):
else:
episodes.append(int(episode))

title = parse.get("title")
season = parse.get("season")

audio = parse.get("audio")
codec = parse.get("codec")
resolution = parse.get("resolution")
quality = parse.get("quality")
subtitles = parse.get("subtitles")
language = parse.get("language")
hdr = parse.get("hdr")
upscaled = parse.get("upscaled")
remastered = parse.get("remastered")
proper = parse.get("proper")
repack = parse.get("repack")
remux = parse.get("remux")
if not language:
language = "English"
extended = parse.get("extended")

return {
"episodes": episodes or [],
"title": title,
"resolution": resolution or [],
"quality": quality or [],
"season": season,
"episodes": episodes or [],
"codec": codec or [],
"audio": audio or [],
"hdr": hdr or False,
"upscaled": upscaled or False,
"remastered": remastered or False,
"proper": proper or False,
"repack": repack or False,
"subtitles": True if subtitles == "Available" else False,
"language": language or [],
"remux": remux or False,
"extended": extended,
"season": season,
}

def episodes(self, string):
def episodes(self, string) -> List[int]:
parse = self._parse(string)
return parse["episodes"]

def episodes_in_season(self, season, string):
def episodes_in_season(self, season, string) -> List[int]:
parse = self._parse(string)
if parse["season"] == season:
return parse["episodes"]
return []

def parse(self, string):
def _is_4k(self, string) -> bool:
"""Check if content is `4k`."""
if self.settings.include_4k:
parsed = self._parse(string)
return parsed.get("resolution", False) in ["2160p", "4K"]

def _is_highest_quality(self, string) -> bool:
"""Check if content is `highest quality`."""
if self.settings.highest_quality:
parsed = self._parse(string)
return any([
parsed.get("hdr", False),
parsed.get("remux", False),
parsed.get("audio", False) in self.audio,
parsed.get("resolution", False) in ["UHD", "2160p", "4K"],
parsed.get("upscaled", False)
])

def _is_repack_or_proper(self, string) -> bool:
"""Check if content is `repack` or `proper`."""
if self.settings.repack_proper:
parsed = self._parse(string)
return any([
parsed.get("proper", False),
parsed.get("repack", False),
])

def _is_dual_audio(self, string) -> bool:
"""Check if content is `dual audio`."""
if self.settings.dual_audio:
parsed = self._parse(string)
return parsed.get("audio") == "Dual" or \
re.search(r"((dual.audio)|(english|eng)\W+(dub|audio))", string, flags=re.IGNORECASE) is not None

def _is_network(self, string) -> bool:
"""Check if content is from a `network`."""
parsed = self._parse(string)
return parsed.get("network", False) in self.network

def sort_streams(self, streams: dict) -> dict:
"""Sorts streams based on user preferences."""
def sorting_key(item):
_, stream = item
title = stream['name']
return (
self._is_dual_audio(title),
self._is_repack_or_proper(title),
self._is_highest_quality(title),
self._is_4k(title),
self._is_network(title)
)
sorted_streams = sorted(streams.items(), key=sorting_key, reverse=True)
return dict(sorted_streams)

def parse(self, string) -> bool:
"""Parse the given string and return True if it matches the user settings."""
parse = self._parse(string)
return (
parse["resolution"] in self.resolution
and parse["language"] in self.language
and parse["audio"] in self.audio
and not parse["quality"] in self.unwanted_quality
and not parse["codec"] in self.unwanted_codec
)

def get_title(self, string) -> str:
"""Get the `title` from the given string."""
parse = self._parse(string)
return parse["title"]

parser = Parser()

0 comments on commit a7708f9

Please sign in to comment.