Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New Extractor! #33

Merged
merged 5 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions mediaflow_proxy/extractors/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from mediaflow_proxy.extractors.uqload import UqloadExtractor
from mediaflow_proxy.extractors.streamtape import StreamtapeExtractor
from mediaflow_proxy.extractors.supervideo import SupervideoExtractor
from mediaflow_proxy.extractors.vixcloud import VixCloudExtractor



Expand All @@ -20,6 +21,7 @@ class ExtractorFactory:
"Mixdrop": MixdropExtractor,
"Streamtape": StreamtapeExtractor,
"Supervideo": SupervideoExtractor,
"VixCloud": VixCloudExtractor,
"LiveTV": LiveTVExtractor,
}

Expand Down
71 changes: 71 additions & 0 deletions mediaflow_proxy/extractors/vixcloud.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import re
from typing import Dict, Any
from bs4 import BeautifulSoup, SoupStrainer
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
import json
from urllib.parse import urlparse, parse_qs


class VixCloudExtractor(BaseExtractor):
"""VixCloud URL extractor."""

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.mediaflow_endpoint = "hls_manifest_proxy"

async def version(self, domain: str) -> str:
"""Get version of VixCloud Parent Site."""
DOMAIN = domain
base_url = f"https://streamingcommunity.{DOMAIN}/richiedi-un-titolo"
response = await self._make_request(
base_url,
headers={
"Referer": f"https://streamingcommunity.{DOMAIN}/",
"Origin": f"https://streamingcommunity.{DOMAIN}",
},
)
if response.status_code != 200:
raise ExtractorError("Outdated Domain")
# Soup the response
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("div", {"id": "app"}))
if soup:
# Extract version
try:
data = json.loads(soup.find("div", {"id": "app"}).get("data-page"))
return data["version"]
except (KeyError, json.JSONDecodeError, AttributeError) as e:
raise ExtractorError(f"Failed to parse version: {e}")

async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
"""Extract Vixcloud URL."""
domain = url.split("://")[1].split("/")[0].split(".")[1]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Make domain extraction more robust.

The current domain extraction is fragile and assumes a specific URL format. Consider using urlparse for more reliable domain extraction.

-        domain = url.split("://")[1].split("/")[0].split(".")[1]
+        parsed = urlparse(url)
+        domain = parsed.netloc.split(".")[-2]

Committable suggestion skipped: line range outside the PR's diff.

version = await self.version(domain)
response = await self._make_request(url, headers={"x-inertia": "true", "x-inertia-version": version})
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("iframe"))
iframe = soup.find("iframe").get("src")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Add error handling for iframe extraction.

The iframe extraction could fail if no iframe is found, leading to an AttributeError.

-        iframe = soup.find("iframe").get("src")
+        iframe_element = soup.find("iframe")
+        if not iframe_element:
+            raise ExtractorError("No iframe found in the response")
+        iframe = iframe_element.get("src")
+        if not iframe:
+            raise ExtractorError("Iframe source URL not found")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
iframe = soup.find("iframe").get("src")
iframe_element = soup.find("iframe")
if not iframe_element:
raise ExtractorError("No iframe found in the response")
iframe = iframe_element.get("src")
if not iframe:
raise ExtractorError("Iframe source URL not found")

parsed_url = urlparse(iframe)
query_params = parse_qs(parsed_url.query)
response = await self._make_request(iframe, headers={"x-inertia": "true", "x-inertia-version": version})

if response.status_code != 200:
raise ExtractorError("Failed to extract URL components, Invalid Request")
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("body"))
if soup:
script = soup.find("body").find("script").text
token = re.search(r"'token':\s*'(\w+)'", script).group(1)
expires = re.search(r"'expires':\s*'(\d+)'", script).group(1)
vixid = iframe.split("/embed/")[1].split("?")[0]
base_url = iframe.split("://")[1].split("/")[0]
final_url = f"https://{base_url}/playlist/{vixid}.m3u8?token={token}&expires={expires}"
Comment on lines +57 to +59
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Make URL parsing more robust.

The URL parsing for vixid and base_url is fragile. Use urlparse consistently for more reliable URL handling.

-            vixid = iframe.split("/embed/")[1].split("?")[0]
-            base_url = iframe.split("://")[1].split("/")[0]
+            iframe_parsed = urlparse(iframe)
+            vixid = iframe_parsed.path.split("/embed/")[1]
+            base_url = iframe_parsed.netloc

Committable suggestion skipped: line range outside the PR's diff.

if "canPlayFHD" in query_params:
# canPlayFHD = "h=1"
final_url += "&h=1"
if "b" in query_params:
# b = "b=1"
final_url += "&b=1"
self.base_headers["referer"] = url
return {
"destination_url": final_url,
"request_headers": self.base_headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
}
2 changes: 1 addition & 1 deletion mediaflow_proxy/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class MPDSegmentParams(GenericParams):


class ExtractorURLParams(GenericParams):
host: Literal["Doodstream", "Mixdrop", "Uqload", "Streamtape", "Supervideo", "LiveTV"] = Field(
host: Literal["Doodstream", "Mixdrop", "Uqload", "Streamtape", "Supervideo", "VixCloud", "LiveTV"] = Field(
..., description="The host to extract the URL from."
)
destination: str = Field(..., description="The URL of the stream.", alias="d")
Expand Down
Loading