From 1ed2c4b37889446ebfbb1cecca0f1b880066eb4f Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 30 Nov 2019 23:21:13 +0100 Subject: [PATCH 1/4] [ooyala] add better fallback values for domain and streams variables --- youtube_dl/extractor/ooyala.py | 103 +++++++++++++++++---------------- 1 file changed, 53 insertions(+), 50 deletions(-) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 995b24d1b..eb957b8fe 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -1,12 +1,12 @@ from __future__ import unicode_literals +import base64 import re from .common import InfoExtractor from ..compat import ( compat_b64decode, compat_str, - compat_urllib_parse_urlencode, ) from ..utils import ( determine_ext, @@ -21,9 +21,9 @@ from ..utils import ( class OoyalaBaseIE(InfoExtractor): _PLAYER_BASE = 'http://player.ooyala.com/' _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/' - _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?' + _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s' - def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None, embed_token=None): + def _extract(self, content_tree_url, video_id, domain=None, supportedformats=None, embed_token=None): content_tree = self._download_json(content_tree_url, video_id)['content_tree'] metadata = content_tree[list(content_tree)[0]] embed_code = metadata['embed_code'] @@ -31,59 +31,62 @@ class OoyalaBaseIE(InfoExtractor): title = metadata['title'] auth_data = self._download_json( - self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) - + compat_urllib_parse_urlencode({ - 'domain': domain, + self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code), + video_id, headers=self.geo_verification_headers(), query={ + 'domain': domain or 'player.ooyala.com', 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds,dash,smooth', 'embedToken': embed_token, - }), video_id, headers=self.geo_verification_headers()) - - cur_auth_data = auth_data['authorization_data'][embed_code] + })['authorization_data'][embed_code] urls = [] formats = [] - if cur_auth_data['authorized']: - for stream in cur_auth_data['streams']: - url_data = try_get(stream, lambda x: x['url']['data'], compat_str) - if not url_data: - continue - s_url = compat_b64decode(url_data).decode('utf-8') - if not s_url or s_url in urls: - continue - urls.append(s_url) - ext = determine_ext(s_url, None) - delivery_type = stream.get('delivery_type') - if delivery_type == 'hls' or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - re.sub(r'/ip(?:ad|hone)/', '/all/', s_url), embed_code, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - elif delivery_type == 'hds' or ext == 'f4m': - formats.extend(self._extract_f4m_formats( - s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) - elif delivery_type == 'dash' or ext == 'mpd': - formats.extend(self._extract_mpd_formats( - s_url, embed_code, mpd_id='dash', fatal=False)) - elif delivery_type == 'smooth': - self._extract_ism_formats( - s_url, embed_code, ism_id='mss', fatal=False) - elif ext == 'smil': - formats.extend(self._extract_smil_formats( - s_url, embed_code, fatal=False)) - else: - formats.append({ - 'url': s_url, - 'ext': ext or delivery_type, - 'vcodec': stream.get('video_codec'), - 'format_id': delivery_type, - 'width': int_or_none(stream.get('width')), - 'height': int_or_none(stream.get('height')), - 'abr': int_or_none(stream.get('audio_bitrate')), - 'vbr': int_or_none(stream.get('video_bitrate')), - 'fps': float_or_none(stream.get('framerate')), - }) - else: + streams = auth_data.get('streams') or [{ + 'delivery_type': 'hls', + 'url': { + 'data': base64.b64encode(('http://player.ooyala.com/hls/player/all/%s.m3u8' % embed_code).encode()).decode(), + } + }] + for stream in streams: + url_data = try_get(stream, lambda x: x['url']['data'], compat_str) + if not url_data: + continue + s_url = compat_b64decode(url_data).decode('utf-8') + if not s_url or s_url in urls: + continue + urls.append(s_url) + ext = determine_ext(s_url, None) + delivery_type = stream.get('delivery_type') + if delivery_type == 'hls' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + re.sub(r'/ip(?:ad|hone)/', '/all/', s_url), embed_code, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif delivery_type == 'hds' or ext == 'f4m': + formats.extend(self._extract_f4m_formats( + s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) + elif delivery_type == 'dash' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + s_url, embed_code, mpd_id='dash', fatal=False)) + elif delivery_type == 'smooth': + self._extract_ism_formats( + s_url, embed_code, ism_id='mss', fatal=False) + elif ext == 'smil': + formats.extend(self._extract_smil_formats( + s_url, embed_code, fatal=False)) + else: + formats.append({ + 'url': s_url, + 'ext': ext or delivery_type, + 'vcodec': stream.get('video_codec'), + 'format_id': delivery_type, + 'width': int_or_none(stream.get('width')), + 'height': int_or_none(stream.get('height')), + 'abr': int_or_none(stream.get('audio_bitrate')), + 'vbr': int_or_none(stream.get('video_bitrate')), + 'fps': float_or_none(stream.get('framerate')), + }) + if not formats and not auth_data.get('authorized'): raise ExtractorError('%s said: %s' % ( - self.IE_NAME, cur_auth_data['message']), expected=True) + self.IE_NAME, auth_data['message']), expected=True) self._sort_formats(formats) subtitles = {} From ddfe50195b525a4dd4b4fa6755e4e630a25fcbef Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 30 Nov 2019 23:48:26 +0100 Subject: [PATCH 2/4] [nintendo] fix extraction and partially add support for Nintendo Direct videos(#4592) --- youtube_dl/extractor/nintendo.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/nintendo.py b/youtube_dl/extractor/nintendo.py index 4b4e66b05..ff8f70ba6 100644 --- a/youtube_dl/extractor/nintendo.py +++ b/youtube_dl/extractor/nintendo.py @@ -5,13 +5,12 @@ import re from .common import InfoExtractor from .ooyala import OoyalaIE -from ..utils import unescapeHTML class NintendoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?nintendo\.com/games/detail/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?nintendo\.com/(?:games/detail|nintendo-direct)/(?P[^/?#&]+)' _TESTS = [{ - 'url': 'http://www.nintendo.com/games/detail/yEiAzhU2eQI1KZ7wOHhngFoAHc1FpHwj', + 'url': 'https://www.nintendo.com/games/detail/duck-hunt-wii-u/', 'info_dict': { 'id': 'MzMmticjp0VPzO3CCj4rmFOuohEuEWoW', 'ext': 'flv', @@ -28,7 +27,19 @@ class NintendoIE(InfoExtractor): 'id': 'tokyo-mirage-sessions-fe-wii-u', 'title': 'Tokyo Mirage Sessions ♯FE', }, - 'playlist_count': 3, + 'playlist_count': 4, + }, { + 'url': 'https://www.nintendo.com/nintendo-direct/09-04-2019/', + 'info_dict': { + 'id': 'J2bXdmaTE6fe3dWJTPcc7m23FNbc_A1V', + 'ext': 'mp4', + 'title': 'Switch_ROS_ND0904-H264.mov', + 'duration': 2324.758, + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['Ooyala'], }] def _real_extract(self, url): @@ -39,8 +50,11 @@ class NintendoIE(InfoExtractor): entries = [ OoyalaIE._build_url_result(m.group('code')) for m in re.finditer( - r'class=(["\'])embed-video\1[^>]+data-video-code=(["\'])(?P(?:(?!\2).)+)\2', - webpage)] + r'data-(?:video-id|directVideoId)=(["\'])(?P(?:(?!\1).)+)\1', webpage)] + + title = self._html_search_regex( + r'(?s)<(?:span|div)[^>]+class="(?:title|wrapper)"[^>]*>.*?

(.+?)

', + webpage, 'title', fatal=False) return self.playlist_result( - entries, page_id, unescapeHTML(self._og_search_title(webpage, fatal=False))) + entries, page_id, title) From 376528447652a6b19e60e837ff632024aec82ad1 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 30 Nov 2019 23:49:45 +0100 Subject: [PATCH 3/4] [teachingchannel] fix extraction --- youtube_dl/extractor/teachingchannel.py | 26 ++++++++++++------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py index e89759714..624cdb3ad 100644 --- a/youtube_dl/extractor/teachingchannel.py +++ b/youtube_dl/extractor/teachingchannel.py @@ -1,35 +1,33 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor -from .ooyala import OoyalaIE class TeachingChannelIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?teachingchannel\.org/videos/(?P.+)' + _VALID_URL = r'https?://(?:www\.)?teachingchannel\.org/videos?/(?P<id>[^/?&#]+)' _TEST = { 'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution', - 'md5': '3d6361864d7cac20b57c8784da17166f', 'info_dict': { - 'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM', + 'id': '3swwlzkT', 'ext': 'mp4', 'title': 'A History of Teaming', 'description': 'md5:2a9033db8da81f2edffa4c99888140b3', - 'duration': 422.255, + 'duration': 422, + 'upload_date': '20170316', + 'timestamp': 1489691297, }, 'params': { 'skip_download': True, }, - 'add_ie': ['Ooyala'], + 'add_ie': ['JWPlatform'], } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - title = mobj.group('title') - webpage = self._download_webpage(url, title) - ooyala_code = self._search_regex( - r'data-embed-code=\'(.+?)\'', webpage, 'ooyala code') + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + mid = self._search_regex( + r'(?:data-mid=["\']|id=["\']jw-video-player-)([a-zA-Z0-9]{8})', + webpage, 'media id') - return OoyalaIE._build_url_result(ooyala_code) + return self.url_result('jwplatform:' + mid, 'JWPlatform', mid) From 12cc89122d1b4e30d4f5a99b2fc0b440217a1693 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 30 Nov 2019 23:50:28 +0100 Subject: [PATCH 4/4] [nrl] fix extraction --- youtube_dl/extractor/nrl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nrl.py b/youtube_dl/extractor/nrl.py index 798b91e04..22a2df8d3 100644 --- a/youtube_dl/extractor/nrl.py +++ b/youtube_dl/extractor/nrl.py @@ -23,8 +23,8 @@ class NRLTVIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - q_data = self._parse_json(self._search_regex( - r"(?s)q-data='({.+?})'", webpage, 'player data'), display_id) + q_data = self._parse_json(self._html_search_regex( + r'(?s)q-data="({.+?})"', webpage, 'player data'), display_id) ooyala_id = q_data['videoId'] return self.url_result( 'ooyala:' + ooyala_id, 'Ooyala', ooyala_id, q_data.get('title'))