Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fork Sync #193

Merged
merged 3 commits into from
Dec 31, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions youtube_dl/extractor/extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1052,6 +1052,7 @@
from .sky import (
SkyNewsIE,
SkySportsIE,
SkySportsNewsIE,
)
from .slideshare import SlideshareIE
from .slideslive import SlidesLiveIE
Expand Down
101 changes: 78 additions & 23 deletions youtube_dl/extractor/sky.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
extract_attributes,
Expand All @@ -11,36 +13,59 @@


class SkyBaseIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_data = extract_attributes(self._search_regex(
r'(<div.+?class="[^"]*sdc-article-video__media-ooyala[^"]*"[^>]+>)',
webpage, 'video data'))

video_url = 'ooyala:%s' % video_data['data-video-id']
if video_data.get('data-token-required') == 'true':
token_fetch_options = self._parse_json(video_data.get(
'data-token-fetch-options', '{}'), video_id, fatal=False) or {}
token_fetch_url = token_fetch_options.get('url')
if token_fetch_url:
embed_token = self._download_webpage(urljoin(
url, token_fetch_url), video_id, fatal=False)
if embed_token:
video_url = smuggle_url(
video_url, {'embed_token': embed_token.strip('"')})
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
_SDC_EL_REGEX = r'(?s)(<div[^>]+data-(?:component-name|fn)="sdc-(?:articl|sit)e-video"[^>]*>)'

def _process_ooyala_element(self, webpage, sdc_el, url):
sdc = extract_attributes(sdc_el)
provider = sdc.get('data-provider')
if provider == 'ooyala':
video_id = sdc['data-sdc-video-id']
video_url = 'ooyala:%s' % video_id
ie_key = 'Ooyala'
ooyala_el = self._search_regex(
r'(<div[^>]+class="[^"]*\bsdc-article-video__media-ooyala\b[^"]*"[^>]+data-video-id="%s"[^>]*>)' % video_id,
webpage, 'video data', fatal=False)
if ooyala_el:
ooyala_attrs = extract_attributes(ooyala_el) or {}
if ooyala_attrs.get('data-token-required') == 'true':
token_fetch_url = (self._parse_json(ooyala_attrs.get(
'data-token-fetch-options', '{}'),
video_id, fatal=False) or {}).get('url')
if token_fetch_url:
embed_token = self._download_json(urljoin(
url, token_fetch_url), video_id, fatal=False)
if embed_token:
video_url = smuggle_url(
video_url, {'embed_token': embed_token})
elif provider == 'brightcove':
video_id = sdc['data-video-id']
account_id = sdc.get('data-account-id') or '6058004172001'
player_id = sdc.get('data-player-id') or 'RC9PQUaJ6'
video_url = self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id)
ie_key = 'BrightcoveNew'

return {
'_type': 'url_transparent',
'id': video_id,
'url': video_url,
'ie_key': ie_key,
}

def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
info = self._process_ooyala_element(webpage, self._search_regex(
self._SDC_EL_REGEX, webpage, 'sdc element'), url)
info.update({
'title': self._og_search_title(webpage),
'description': strip_or_none(self._og_search_description(webpage)),
'ie_key': 'Ooyala',
}
})
return info


class SkySportsIE(SkyBaseIE):
IE_NAME = 'sky:sports'
_VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/([^/]+/)*(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine',
Expand All @@ -62,15 +87,45 @@ class SkySportsIE(SkyBaseIE):


class SkyNewsIE(SkyBaseIE):
IE_NAME = 'sky:news'
_VALID_URL = r'https?://news\.sky\.com/video/[0-9a-z-]+-(?P<id>[0-9]+)'
_TEST = {
'url': 'https://news.sky.com/video/russian-plane-inspected-after-deadly-fire-11712962',
'md5': 'd6327e581473cea9976a3236ded370cd',
'md5': '411e8893fd216c75eaf7e4c65d364115',
'info_dict': {
'id': '1ua21xaDE6lCtZDmbYfl8kwsKLooJbNM',
'id': 'ref:1ua21xaDE6lCtZDmbYfl8kwsKLooJbNM',
'ext': 'mp4',
'title': 'Russian plane inspected after deadly fire',
'description': 'The Russian Investigative Committee has released video of the wreckage of a passenger plane which caught fire near Moscow.',
'uploader_id': '6058004172001',
'timestamp': 1567112345,
'upload_date': '20190829',
},
'add_ie': ['Ooyala'],
'add_ie': ['BrightcoveNew'],
}


class SkySportsNewsIE(SkyBaseIE):
IE_NAME = 'sky:sports:news'
_VALID_URL = r'https?://(?:www\.)?skysports\.com/([^/]+/)*news/\d+/(?P<id>\d+)'
_TEST = {
'url': 'http://www.skysports.com/golf/news/12176/10871916/dustin-johnson-ready-to-conquer-players-championship-at-tpc-sawgrass',
'info_dict': {
'id': '10871916',
'title': 'Dustin Johnson ready to conquer Players Championship at TPC Sawgrass',
'description': 'Dustin Johnson is confident he can continue his dominant form in 2017 by adding the Players Championship to his list of victories.',
},
'playlist_count': 2,
}

def _real_extract(self, url):
article_id = self._match_id(url)
webpage = self._download_webpage(url, article_id)

entries = []
for sdc_el in re.findall(self._SDC_EL_REGEX, webpage):
entries.append(self._process_ooyala_element(webpage, sdc_el, url))

return self.playlist_result(
entries, article_id, self._og_search_title(webpage),
self._html_search_meta(['og:description', 'description'], webpage))
7 changes: 3 additions & 4 deletions youtube_dl/extractor/vvvvid.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,6 @@ def f(m):
embed_code = ds(embed_code)
video_type = video_data.get('video_type')
if video_type in ('video/rcs', 'video/kenc'):
embed_code = re.sub(r'https?://([^/]+)/z/', r'https://\1/i/', embed_code).replace('/manifest.f4m', '/master.m3u8')
if video_type == 'video/kenc':
kenc = self._download_json(
'https://www.vvvvid.it/kenc', video_id, query={
Expand All @@ -163,9 +162,7 @@ def f(m):
kenc_message = kenc.get('message')
if kenc_message:
embed_code += '?' + ds(kenc_message)
formats.extend(self._extract_m3u8_formats(
embed_code, video_id, 'mp4',
m3u8_id='hls', fatal=False))
formats.extend(self._extract_akamai_formats(embed_code, video_id))
else:
formats.extend(self._extract_wowza_formats(
'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
Expand Down Expand Up @@ -217,6 +214,8 @@ def _real_extract(self, url):
season_number = int_or_none(season.get('number'))
episodes = season.get('episodes') or []
for episode in episodes:
if episode.get('playable') is False:
continue
season_id = str_or_none(episode.get('season_id'))
video_id = str_or_none(episode.get('video_id'))
if not (season_id and video_id):
Expand Down
4 changes: 2 additions & 2 deletions youtube_dl/extractor/yandexvideo.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def _real_extract(self, url):
video_id = self._match_id(url)

player = try_get((self._download_json(
'https://frontend.vh.yandex.ru/graphql', video_id, data=b'''{
'https://frontend.vh.yandex.ru/graphql', video_id, data=('''{
player(content_id: "%s") {
computed_title
content_url
Expand All @@ -86,7 +86,7 @@ def _real_extract(self, url):
title
views_count
}
}''' % video_id.encode(), fatal=False)), lambda x: x['player']['content'])
}''' % video_id).encode(), fatal=False)), lambda x: x['player']['content'])
if not player or player.get('error'):
player = self._download_json(
'https://frontend.vh.yandex.ru/v23/player/%s.json' % video_id,
Expand Down