diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 02cd2c003..a285ee7d8 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1006,6 +1006,13 @@ class InfoExtractor(object): def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), fatal=True): + # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy + akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0') + if akamai_pv is not None and ';' in akamai_pv.text: + playerVerificationChallenge = akamai_pv.text.split(';')[0] + if playerVerificationChallenge.strip() != '': + return [] + formats = [] manifest_version = '1.0' media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media') diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index 0f8c73fd7..113a4966f 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -23,7 +23,7 @@ class EaglePlatformIE(InfoExtractor): _TESTS = [{ # http://lenta.ru/news/2015/03/06/navalny/ 'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201', - 'md5': '881ee8460e1b7735a8be938e2ffb362b', + # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used 'info_dict': { 'id': '227304', 'ext': 'mp4', @@ -109,8 +109,11 @@ class EaglePlatformIE(InfoExtractor): mobj = re.search('/([^/]+)/index\.m3u8', m3u8_format['url']) if mobj: http_format = m3u8_format.copy() + video_url = mp4_url.replace(mp4_url_basename, mobj.group(1)) + if not self._is_valid_url(video_url, video_id): + continue http_format.update({ - 'url': mp4_url.replace(mp4_url_basename, mobj.group(1)), + 'url': video_url, 'format_id': m3u8_format['format_id'].replace('hls', 'http'), 'protocol': 'http', }) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6de3438fc..8b215c5ab 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -439,7 +439,6 @@ from .mtv import ( ) from .muenchentv import MuenchenTVIE from .musicplayon import MusicPlayOnIE -from .muzu import MuzuTVIE from .mwave import MwaveIE from .myspace import MySpaceIE, MySpaceAlbumIE from .myspass import MySpassIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c63bdbd08..a95501d86 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -887,6 +887,7 @@ class GenericIE(InfoExtractor): # Eagle.Platform embed (generic URL) { 'url': 'http://lenta.ru/news/2015/03/06/navalny/', + # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used 'info_dict': { 'id': '227304', 'ext': 'mp4', @@ -901,6 +902,7 @@ class GenericIE(InfoExtractor): # ClipYou (Eagle.Platform) embed (custom URL) { 'url': 'http://muz-tv.ru/play/7129/', + # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used 'info_dict': { 'id': '12820', 'ext': 'mp4', diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 7b4581dc5..3589c223d 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -15,9 +15,9 @@ class MiTeleIE(InfoExtractor): IE_DESC = 'mitele.es' _VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P[^/]+)/' - _TESTS = [{ + _TEST = { 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', - 'md5': '0ff1a13aebb35d9bc14081ff633dd324', + # MD5 is unstable 'info_dict': { 'id': '0NF1jJnxS1Wu3pHrmvFyw2', 'display_id': 'programa-144', @@ -27,7 +27,7 @@ class MiTeleIE(InfoExtractor): 'thumbnail': 're:(?i)^https?://.*\.jpg$', 'duration': 2913, }, - }] + } def _real_extract(self, url): display_id = self._match_id(url) diff --git a/youtube_dl/extractor/muzu.py b/youtube_dl/extractor/muzu.py deleted file mode 100644 index cbc800481..000000000 --- a/youtube_dl/extractor/muzu.py +++ /dev/null @@ -1,63 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode - - -class MuzuTVIE(InfoExtractor): - _VALID_URL = r'https?://www\.muzu\.tv/(.+?)/(.+?)/(?P\d+)' - IE_NAME = 'muzu.tv' - - _TEST = { - 'url': 'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/', - 'md5': '98f8b2c7bc50578d6a0364fff2bfb000', - 'info_dict': { - 'id': '1981454', - 'ext': 'mp4', - 'title': 'Cat Walk (Original Mix)', - 'description': 'md5:90e868994de201b2570e4e5854e19420', - 'uploader': 'MarcAshken featuring SOS', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - info_data = compat_urllib_parse_urlencode({ - 'format': 'json', - 'url': url, - }) - info = self._download_json( - 'http://www.muzu.tv/api/oembed/?%s' % info_data, - video_id, 'Downloading video info') - - player_info = self._download_json( - 'http://player.muzu.tv/player/playerInit?ai=%s' % video_id, - video_id, 'Downloading player info') - video_info = player_info['videos'][0] - for quality in ['1080', '720', '480', '360']: - if video_info.get('v%s' % quality): - break - - data = compat_urllib_parse_urlencode({ - 'ai': video_id, - # Even if each time you watch a video the hash changes, - # it seems to work for different videos, and it will work - # even if you use any non empty string as a hash - 'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k', - 'device': 'web', - 'qv': quality, - }) - video_url_info = self._download_json( - 'http://player.muzu.tv/player/requestVideo?%s' % data, - video_id, 'Downloading video url') - video_url = video_url_info['url'] - - return { - 'id': video_id, - 'title': info['title'], - 'url': video_url, - 'thumbnail': info['thumbnail_url'], - 'description': info['description'], - 'uploader': info['author_name'], - } diff --git a/youtube_dl/extractor/normalboots.py b/youtube_dl/extractor/normalboots.py index 77e091072..af44c3bb5 100644 --- a/youtube_dl/extractor/normalboots.py +++ b/youtube_dl/extractor/normalboots.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from .screenwavemedia import ScreenwaveMediaIE from ..utils import ( unified_strdate, @@ -12,7 +13,6 @@ class NormalbootsIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?normalboots\.com/video/(?P[0-9a-z-]*)/?$' _TEST = { 'url': 'http://normalboots.com/video/home-alone-games-jontron/', - 'md5': '8bf6de238915dd501105b44ef5f1e0f6', 'info_dict': { 'id': 'home-alone-games-jontron', 'ext': 'mp4', @@ -22,9 +22,10 @@ class NormalbootsIE(InfoExtractor): 'upload_date': '20140125', }, 'params': { - # rtmp download + # m3u8 download 'skip_download': True, }, + 'add_ie': ['ScreenwaveMedia'], } def _real_extract(self, url): @@ -38,16 +39,15 @@ class NormalbootsIE(InfoExtractor): r'[A-Za-z]+, (?P.*)', webpage, 'date', fatal=False)) - player_url = self._html_search_regex( - r'[\S]+)"', - webpage, 'player url') - player_page = self._download_webpage(player_url, video_id) - video_url = self._html_search_regex( - r"file:\s'(?P[^']+\.mp4)'", player_page, 'file') + screenwavemedia_url = self._html_search_regex( + ScreenwaveMediaIE.EMBED_PATTERN, webpage, 'screenwave URL', + group='url') return { + '_type': 'url_transparent', 'id': video_id, - 'url': video_url, + 'url': screenwavemedia_url, + 'ie_key': ScreenwaveMediaIE.ie_key(), 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 9df200822..51dfc27ac 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -23,7 +23,7 @@ class NRKIE(InfoExtractor): _TESTS = [ { 'url': 'http://www.nrk.no/video/PS*150533', - 'md5': 'bccd850baebefe23b56d708a113229c2', + # MD5 is unstable 'info_dict': { 'id': '150533', 'ext': 'flv', @@ -34,7 +34,7 @@ class NRKIE(InfoExtractor): }, { 'url': 'http://www.nrk.no/video/PS*154915', - 'md5': '0b1493ba1aae7d9579a5ad5531bc395a', + # MD5 is unstable 'info_dict': { 'id': '154915', 'ext': 'flv', diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index f9e064a60..986708e75 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -2,7 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote +from ..compat import ( + compat_parse_qs, + compat_urllib_parse_unquote, + compat_urllib_parse_urlparse, +) from ..utils import ( ExtractorError, unified_strdate, @@ -32,7 +36,7 @@ class OdnoklassnikiIE(InfoExtractor): 'skip': 'Video has been blocked', }, { # metadataUrl - 'url': 'http://ok.ru/video/63567059965189-0', + 'url': 'http://ok.ru/video/63567059965189-0?fromTime=5', 'md5': '9676cf86eff5391d35dea675d224e131', 'info_dict': { 'id': '63567059965189-0', @@ -44,6 +48,7 @@ class OdnoklassnikiIE(InfoExtractor): 'uploader': '☭ Андрей Мещанинов ☭', 'like_count': int, 'age_limit': 0, + 'start_time': 5, }, }, { # YouTube embed (metadataUrl, provider == USER_YOUTUBE) @@ -60,6 +65,22 @@ class OdnoklassnikiIE(InfoExtractor): 'uploader': 'Алина П', 'age_limit': 0, }, + }, { + # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field) + 'url': 'http://ok.ru/video/62036049272859-0', + 'info_dict': { + 'id': '62036049272859-0', + 'ext': 'mp4', + 'title': 'МУЗЫКА ДОЖДЯ .', + 'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0', + 'upload_date': '20120106', + 'uploader_id': '473534735899', + 'uploader': 'МARINA D', + 'age_limit': 0, + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452', 'only_matching': True, @@ -78,6 +99,9 @@ class OdnoklassnikiIE(InfoExtractor): }] def _real_extract(self, url): + start_time = int_or_none(compat_parse_qs( + compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0]) + video_id = self._match_id(url) webpage = self._download_webpage( @@ -106,7 +130,14 @@ class OdnoklassnikiIE(InfoExtractor): video_id, 'Downloading metadata JSON') movie = metadata['movie'] - title = movie['title'] + + # Some embedded videos may not contain title in movie dict (e.g. + # http://ok.ru/video/62036049272859-0) thus we allow missing title + # here and it's going to be extracted later by an extractor that + # will process the actual embed. + provider = metadata.get('provider') + title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title') + thumbnail = movie.get('poster') duration = int_or_none(movie.get('duration')) @@ -135,9 +166,10 @@ class OdnoklassnikiIE(InfoExtractor): 'uploader_id': uploader_id, 'like_count': like_count, 'age_limit': age_limit, + 'start_time': start_time, } - if metadata.get('provider') == 'USER_YOUTUBE': + if provider == 'USER_YOUTUBE': info.update({ '_type': 'url_transparent', 'url': movie['contentId'], diff --git a/youtube_dl/extractor/screenwavemedia.py b/youtube_dl/extractor/screenwavemedia.py index 44b0bbee6..40333c825 100644 --- a/youtube_dl/extractor/screenwavemedia.py +++ b/youtube_dl/extractor/screenwavemedia.py @@ -12,7 +12,7 @@ from ..utils import ( class ScreenwaveMediaIE(InfoExtractor): - _VALID_URL = r'https?://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P[A-Za-z0-9-]+)' + _VALID_URL = r'(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P[A-Za-z0-9-]+)' EMBED_PATTERN = r'src=(["\'])(?P(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=.+?)\1' _TESTS = [{ 'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911', diff --git a/youtube_dl/extractor/unistra.py b/youtube_dl/extractor/unistra.py index 66d9f1bf3..a724cdbef 100644 --- a/youtube_dl/extractor/unistra.py +++ b/youtube_dl/extractor/unistra.py @@ -49,6 +49,7 @@ class UnistraIE(InfoExtractor): 'format_id': format_id, 'quality': quality(format_id) }) + self._sort_formats(formats) title = self._html_search_regex( r'UTV - (.*?)</', webpage, 'title')