From fb72ec58ae2612590d661c9943fe6b2fa0864401 Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 30 Jul 2015 17:34:38 +0100 Subject: [PATCH 1/9] [extractor/common] do not process f4m manifest that contain akamai playerVerificationChallenge --- youtube_dl/extractor/common.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index dc5080504..e3d1dd076 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -856,6 +856,13 @@ class InfoExtractor(object): # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244) transform_source=transform_source) + # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy + akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0') + if akamai_pv is not None and ';' in akamai_pv.text: + playerVerificationChallenge = akamai_pv.text.split(';')[0] + if playerVerificationChallenge.strip() != '': + return [] + formats = [] manifest_version = '1.0' media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media') From abc1723edd03d38b256e012d465e3343064f5682 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 25 Apr 2016 22:24:40 +0800 Subject: [PATCH 2/9] [unistra] Sort formats Originally URLs are passed to set() and not sorted, so the result is not deterministic, causing occasional FAILs on Travis CI. --- youtube_dl/extractor/unistra.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/unistra.py b/youtube_dl/extractor/unistra.py index 66d9f1bf3..a724cdbef 100644 --- a/youtube_dl/extractor/unistra.py +++ b/youtube_dl/extractor/unistra.py @@ -49,6 +49,7 @@ class UnistraIE(InfoExtractor): 'format_id': format_id, 'quality': quality(format_id) }) + self._sort_formats(formats) title = self._html_search_regex( r'UTV - (.*?)</', webpage, 'title') From 2beff95da5fb28440d26a3dee5de575c792d133c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 25 Apr 2016 22:26:19 +0800 Subject: [PATCH 3/9] [nrk] Comment out unstable MD5 checksums Both are Akamai f4f fragments. --- youtube_dl/extractor/nrk.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 9df200822..51dfc27ac 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -23,7 +23,7 @@ class NRKIE(InfoExtractor): _TESTS = [ { 'url': 'http://www.nrk.no/video/PS*150533', - 'md5': 'bccd850baebefe23b56d708a113229c2', + # MD5 is unstable 'info_dict': { 'id': '150533', 'ext': 'flv', @@ -34,7 +34,7 @@ class NRKIE(InfoExtractor): }, { 'url': 'http://www.nrk.no/video/PS*154915', - 'md5': '0b1493ba1aae7d9579a5ad5531bc395a', + # MD5 is unstable 'info_dict': { 'id': '154915', 'ext': 'flv', From 6bdc2d5358c2843e3be4d073b2005e5196519664 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 25 Apr 2016 22:27:25 +0800 Subject: [PATCH 4/9] [mitele] Comment out unstable MD5 Also Akamai f4f fragments --- youtube_dl/extractor/mitele.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 7b4581dc5..3589c223d 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -15,9 +15,9 @@ class MiTeleIE(InfoExtractor): IE_DESC = 'mitele.es' _VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/' - _TESTS = [{ + _TEST = { 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', - 'md5': '0ff1a13aebb35d9bc14081ff633dd324', + # MD5 is unstable 'info_dict': { 'id': '0NF1jJnxS1Wu3pHrmvFyw2', 'display_id': 'programa-144', @@ -27,7 +27,7 @@ class MiTeleIE(InfoExtractor): 'thumbnail': 're:(?i)^https?://.*\.jpg$', 'duration': 2913, }, - }] + } def _real_extract(self, url): display_id = self._match_id(url) From 4645432d7a92bfb950571dde5dd690110e0f2284 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 25 Apr 2016 22:48:17 +0800 Subject: [PATCH 5/9] [eagleplatform] Checking direct HTTP links Sometimes they fail with 404 --- youtube_dl/extractor/eagleplatform.py | 7 +++++-- youtube_dl/extractor/generic.py | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index 0f8c73fd7..113a4966f 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -23,7 +23,7 @@ class EaglePlatformIE(InfoExtractor): _TESTS = [{ # http://lenta.ru/news/2015/03/06/navalny/ 'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201', - 'md5': '881ee8460e1b7735a8be938e2ffb362b', + # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used 'info_dict': { 'id': '227304', 'ext': 'mp4', @@ -109,8 +109,11 @@ class EaglePlatformIE(InfoExtractor): mobj = re.search('/([^/]+)/index\.m3u8', m3u8_format['url']) if mobj: http_format = m3u8_format.copy() + video_url = mp4_url.replace(mp4_url_basename, mobj.group(1)) + if not self._is_valid_url(video_url, video_id): + continue http_format.update({ - 'url': mp4_url.replace(mp4_url_basename, mobj.group(1)), + 'url': video_url, 'format_id': m3u8_format['format_id'].replace('hls', 'http'), 'protocol': 'http', }) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c63bdbd08..a95501d86 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -887,6 +887,7 @@ class GenericIE(InfoExtractor): # Eagle.Platform embed (generic URL) { 'url': 'http://lenta.ru/news/2015/03/06/navalny/', + # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used 'info_dict': { 'id': '227304', 'ext': 'mp4', @@ -901,6 +902,7 @@ class GenericIE(InfoExtractor): # ClipYou (Eagle.Platform) embed (custom URL) { 'url': 'http://muz-tv.ru/play/7129/', + # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used 'info_dict': { 'id': '12820', 'ext': 'mp4', From ad58942d57996f7f43601f22c85b8c6a9afe1b09 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 25 Apr 2016 23:35:05 +0800 Subject: [PATCH 6/9] [muzu] Remove extractor MUZU is shutting down in October 2015. [1] [1] http://www.musicbusinessworldwide.com/youtube-rival-muzu-is-heading-into-liquidation/ --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/muzu.py | 63 ------------------------------ 2 files changed, 64 deletions(-) delete mode 100644 youtube_dl/extractor/muzu.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6de3438fc..8b215c5ab 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -439,7 +439,6 @@ from .mtv import ( ) from .muenchentv import MuenchenTVIE from .musicplayon import MusicPlayOnIE -from .muzu import MuzuTVIE from .mwave import MwaveIE from .myspace import MySpaceIE, MySpaceAlbumIE from .myspass import MySpassIE diff --git a/youtube_dl/extractor/muzu.py b/youtube_dl/extractor/muzu.py deleted file mode 100644 index cbc800481..000000000 --- a/youtube_dl/extractor/muzu.py +++ /dev/null @@ -1,63 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode - - -class MuzuTVIE(InfoExtractor): - _VALID_URL = r'https?://www\.muzu\.tv/(.+?)/(.+?)/(?P<id>\d+)' - IE_NAME = 'muzu.tv' - - _TEST = { - 'url': 'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/', - 'md5': '98f8b2c7bc50578d6a0364fff2bfb000', - 'info_dict': { - 'id': '1981454', - 'ext': 'mp4', - 'title': 'Cat Walk (Original Mix)', - 'description': 'md5:90e868994de201b2570e4e5854e19420', - 'uploader': 'MarcAshken featuring SOS', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - info_data = compat_urllib_parse_urlencode({ - 'format': 'json', - 'url': url, - }) - info = self._download_json( - 'http://www.muzu.tv/api/oembed/?%s' % info_data, - video_id, 'Downloading video info') - - player_info = self._download_json( - 'http://player.muzu.tv/player/playerInit?ai=%s' % video_id, - video_id, 'Downloading player info') - video_info = player_info['videos'][0] - for quality in ['1080', '720', '480', '360']: - if video_info.get('v%s' % quality): - break - - data = compat_urllib_parse_urlencode({ - 'ai': video_id, - # Even if each time you watch a video the hash changes, - # it seems to work for different videos, and it will work - # even if you use any non empty string as a hash - 'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k', - 'device': 'web', - 'qv': quality, - }) - video_url_info = self._download_json( - 'http://player.muzu.tv/player/requestVideo?%s' % data, - video_id, 'Downloading video url') - video_url = video_url_info['url'] - - return { - 'id': video_id, - 'title': info['title'], - 'url': video_url, - 'thumbnail': info['thumbnail_url'], - 'description': info['description'], - 'uploader': info['author_name'], - } From e3de3d6f2f9c82683e76b6bc12697aa7264372ca Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 25 Apr 2016 23:49:12 +0800 Subject: [PATCH 7/9] [normalboots] Fix extraction Now it's using ScreenwaveMedia --- youtube_dl/extractor/normalboots.py | 18 +++++++++--------- youtube_dl/extractor/screenwavemedia.py | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/normalboots.py b/youtube_dl/extractor/normalboots.py index 77e091072..af44c3bb5 100644 --- a/youtube_dl/extractor/normalboots.py +++ b/youtube_dl/extractor/normalboots.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from .screenwavemedia import ScreenwaveMediaIE from ..utils import ( unified_strdate, @@ -12,7 +13,6 @@ class NormalbootsIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?normalboots\.com/video/(?P<id>[0-9a-z-]*)/?$' _TEST = { 'url': 'http://normalboots.com/video/home-alone-games-jontron/', - 'md5': '8bf6de238915dd501105b44ef5f1e0f6', 'info_dict': { 'id': 'home-alone-games-jontron', 'ext': 'mp4', @@ -22,9 +22,10 @@ class NormalbootsIE(InfoExtractor): 'upload_date': '20140125', }, 'params': { - # rtmp download + # m3u8 download 'skip_download': True, }, + 'add_ie': ['ScreenwaveMedia'], } def _real_extract(self, url): @@ -38,16 +39,15 @@ class NormalbootsIE(InfoExtractor): r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>', webpage, 'date', fatal=False)) - player_url = self._html_search_regex( - r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', - webpage, 'player url') - player_page = self._download_webpage(player_url, video_id) - video_url = self._html_search_regex( - r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file') + screenwavemedia_url = self._html_search_regex( + ScreenwaveMediaIE.EMBED_PATTERN, webpage, 'screenwave URL', + group='url') return { + '_type': 'url_transparent', 'id': video_id, - 'url': video_url, + 'url': screenwavemedia_url, + 'ie_key': ScreenwaveMediaIE.ie_key(), 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), diff --git a/youtube_dl/extractor/screenwavemedia.py b/youtube_dl/extractor/screenwavemedia.py index 44b0bbee6..40333c825 100644 --- a/youtube_dl/extractor/screenwavemedia.py +++ b/youtube_dl/extractor/screenwavemedia.py @@ -12,7 +12,7 @@ from ..utils import ( class ScreenwaveMediaIE(InfoExtractor): - _VALID_URL = r'https?://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)' + _VALID_URL = r'(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)' EMBED_PATTERN = r'src=(["\'])(?P<url>(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=.+?)\1' _TESTS = [{ 'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911', From 749b0046a8664d023ff622dd38844f5c8632f3f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 25 Apr 2016 22:05:47 +0600 Subject: [PATCH 8/9] [ok] Allow embeds without title (Closes #9303) --- youtube_dl/extractor/odnoklassniki.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index f9e064a60..cd614f427 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -60,6 +60,22 @@ class OdnoklassnikiIE(InfoExtractor): 'uploader': 'Алина П', 'age_limit': 0, }, + }, { + # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field) + 'url': 'http://ok.ru/video/62036049272859-0', + 'info_dict': { + 'id': '62036049272859-0', + 'ext': 'mp4', + 'title': 'МУЗЫКА ДОЖДЯ .', + 'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0', + 'upload_date': '20120106', + 'uploader_id': '473534735899', + 'uploader': 'МARINA D', + 'age_limit': 0, + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452', 'only_matching': True, @@ -106,7 +122,14 @@ class OdnoklassnikiIE(InfoExtractor): video_id, 'Downloading metadata JSON') movie = metadata['movie'] - title = movie['title'] + + # Some embedded videos may not contain title in movie dict (e.g. + # http://ok.ru/video/62036049272859-0) thus we allow missing title + # here and it's going to be extracted later by an extractor that + # will process the actual embed. + provider = metadata.get('provider') + title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title') + thumbnail = movie.get('poster') duration = int_or_none(movie.get('duration')) @@ -137,7 +160,7 @@ class OdnoklassnikiIE(InfoExtractor): 'age_limit': age_limit, } - if metadata.get('provider') == 'USER_YOUTUBE': + if provider == 'USER_YOUTUBE': info.update({ '_type': 'url_transparent', 'url': movie['contentId'], From c9fd5306709d0c03487a3b0163b7a33cab6774aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 25 Apr 2016 22:15:15 +0600 Subject: [PATCH 9/9] [ok] Extract start time --- youtube_dl/extractor/odnoklassniki.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index cd614f427..986708e75 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -2,7 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote +from ..compat import ( + compat_parse_qs, + compat_urllib_parse_unquote, + compat_urllib_parse_urlparse, +) from ..utils import ( ExtractorError, unified_strdate, @@ -32,7 +36,7 @@ class OdnoklassnikiIE(InfoExtractor): 'skip': 'Video has been blocked', }, { # metadataUrl - 'url': 'http://ok.ru/video/63567059965189-0', + 'url': 'http://ok.ru/video/63567059965189-0?fromTime=5', 'md5': '9676cf86eff5391d35dea675d224e131', 'info_dict': { 'id': '63567059965189-0', @@ -44,6 +48,7 @@ class OdnoklassnikiIE(InfoExtractor): 'uploader': '☭ Андрей Мещанинов ☭', 'like_count': int, 'age_limit': 0, + 'start_time': 5, }, }, { # YouTube embed (metadataUrl, provider == USER_YOUTUBE) @@ -94,6 +99,9 @@ class OdnoklassnikiIE(InfoExtractor): }] def _real_extract(self, url): + start_time = int_or_none(compat_parse_qs( + compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0]) + video_id = self._match_id(url) webpage = self._download_webpage( @@ -158,6 +166,7 @@ class OdnoklassnikiIE(InfoExtractor): 'uploader_id': uploader_id, 'like_count': like_count, 'age_limit': age_limit, + 'start_time': start_time, } if provider == 'USER_YOUTUBE':