Skip to content

Commit

Permalink
Added more refined logic to parser module.
Browse files Browse the repository at this point in the history
  • Loading branch information
Spoked authored and Spoked committed Jan 8, 2024
1 parent a98e84b commit ec7f550
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 5 deletions.
1 change: 1 addition & 0 deletions backend/program/scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from utils.service_manager import ServiceManager
from utils.settings import settings_manager as settings
from utils.logger import logger
from utils.parser import parser
from .torrentio import Torrentio
from .orionoid import Orionoid
from .jackett import Jackett
Expand Down
1 change: 1 addition & 0 deletions backend/program/scrapers/jackett.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def api_scrape(self, item):
infohash = attr.get('@value')
if parser.parse(title) and infohash:
data[infohash] = {"name": title}
# TODO: Sort data using parser and user preferences
if len(data) > 0:
return data
return {}
Expand Down
1 change: 1 addition & 0 deletions backend/program/scrapers/orionoid.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ def api_scrape(self, item):
infoHash = stream.file.hash
if parser.parse(title) and infoHash:
data[infoHash] = {"name": title}
# TODO: Sort data using parser and user preferences
if len(data) > 0:
return data
return {}
6 changes: 4 additions & 2 deletions backend/program/scrapers/torrentio.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,12 @@ def api_scrape(self, item):
if response.is_ok:
data = {}
for stream in response.data.streams:
if parser.parse(stream.title):
title = stream.title.split("\n👤")[0]
if parser.parse(title):
data[stream.infoHash] = {
"name": stream.title.split("\n👤")[0],
"name": title,
}
# TODO: Sort data using parser and user preferences
if len(data) > 0:
return data
return {}
9 changes: 8 additions & 1 deletion backend/utils/default_settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,5 +47,12 @@
"enabled": false,
"url": "http://localhost:9117"
}
}
},
"parser": {
"language": ["English"],
"include_4k": false,
"highest_quality": false,
"dual_audio": true,
"av1_audio": false
}
}
90 changes: 88 additions & 2 deletions backend/utils/parser.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,54 @@
import re
import PTN
from typing import List
from pydantic import BaseModel
from utils.settings import settings_manager


class ParserConfig(BaseModel):
language: List[str]
include_4k: bool
highest_quality: bool
dual_audio: bool # This sometimes doesnt work depending on if other audio is in the title
av1_audio: bool

class Parser:

def __init__(self):
self.settings = ParserConfig(**settings_manager.get("parser"))
self.language = self.settings.language
self.resolution = ["1080p", "720p"]
self.language = ["English"]
self.unwanted_codec = ["H.265 Main 10", "H.265", "H.263", "Xvid"] # Bad for transcoding
self.quality = [None, "Blu-ray", "WEB-DL", "WEBRip", "HDRip",
"HDTVRip", "BDRip", "Pay-Per-View Rip"]
self.unwanted_quality = ["Cam", "Telesync", "Telecine", "Screener",
"DVDSCR", "Workprint", "DVD-Rip", "TVRip",
"VODRip", "DVD-R", "DSRip", "BRRip"]
self.audio = [None, "AAC", "AAC 2.0", "FLAC", "Custom"]
self.network = ["Apple TV+", "Amazon Studios", "Netflix",
"Nickelodeon", "YouTube Premium", "Disney Plus",
"DisneyNOW", "HBO Max", "HBO", "Hulu Networks",
"DC Universe", "Adult Swim", "Comedy Central",
"Peacock", "AMC", "PBS", "Crunchyroll"]
self.validate_settings()

def validate_settings(self):
if self.settings.include_4k or self.settings.highest_quality:
self.resolution += ["2160p", "4K"]
if self.settings.highest_quality:
self.resolution += ["UHD"]
self.audio += ["Dolby TrueHD", "Dolby Atmos",
"Dolby Digital EX", "Dolby Digital Plus",
"Dolby Digital Plus 5.1", "Dolby Digital Plus 7.1"
"DTS-HD MA", "DTS-HD MA", "DTS-HD",
"DTS-EX", "DTS:X", "DTS", "5.1", "7.1"]
self.unwanted_codec -= ["H.265 Main 10", "H.265"]
if self.settings.dual_audio:
self.audio += ["Dual"]
if not self.settings.av1_audio:
self.unwanted_codec += ["AV1"] # Not all devices support this
# if self.settings.low_resolution:
# self.resolution += ["480p", "360p"] # This needs work. Should check item.year as well?

def _parse(self, string):
parse = PTN.parse(string)
Expand All @@ -20,10 +64,16 @@ def _parse(self, string):
episodes.append(int(episode))

season = parse.get("season")

audio = parse.get("audio")
resolution = parse.get("resolution")
quality = parse.get("quality")
subtitles = parse.get("subtitles")
language = parse.get("language")
hdr = parse.get("hdr")
remastered = parse.get("remastered")
proper = parse.get("proper")
repack = parse.get("repack")
remux = parse.get("remux")
if not language:
language = "English"
extended = parse.get("extended")
Expand All @@ -32,7 +82,14 @@ def _parse(self, string):
"episodes": episodes or [],
"resolution": resolution or [],
"quality": quality or [],
"audio": audio or None,
"hdr": hdr or None,
"remastered": remastered or None,
"proper": proper or None,
"repack": repack or None,
"subtitles": subtitles or [],
"language": language or [],
"remux": remux or None,
"extended": extended,
"season": season,
}
Expand All @@ -47,11 +104,40 @@ def episodes_in_season(self, season, string):
return parse["episodes"]
return []

def sort_dual_audio(self, string):
"""Check if content has dual audio."""
# TODO: This could use improvement.. untested.
parse = self._parse(string)
return parse["audio"] == "Dual" or re.search(r"((dual.audio)|(english|eng)\W+(dub|audio))", string, flags=re.IGNORECASE)

def remove_unwanted(self, string):
"""Filter out unwanted content."""
# TODO: This could use improvement.. untested.
parse = self._parse(string)
return not any([
parse["quality"] in self.unwanted_quality,
parse["codec"] in self.unwanted_codec
])

def sort_and_filter_streams(self, streams: dict) -> dict:
"""Sorts and filters streams based on user preferences"""
# TODO: Sort scraped data based on user preferences
# instead of scraping one item at a time.
filtered_sorted_streams = []
for info_hash, stream_details in streams.items():
title = stream_details.get("name", "")
if self.remove_unwanted(title):
filtered_sorted_streams.append((info_hash, stream_details, self.has_dual_audio(title)))
filtered_sorted_streams.sort(key=lambda x: x[2], reverse=True)
sorted_data = {info_hash: details for info_hash, details, _ in filtered_sorted_streams}
return sorted_data

def parse(self, string):
parse = self._parse(string)
return (
parse["resolution"] in self.resolution
and parse["language"] in self.language
and not parse["quality"] in self.unwanted_quality
)

parser = Parser()

0 comments on commit ec7f550

Please sign in to comment.