This commit is contained in:
Gilles Habran 2016-04-26 13:17:06 +02:00
commit d0170f518f
11 changed files with 66 additions and 85 deletions

View File

@ -1006,6 +1006,13 @@ class InfoExtractor(object):
def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None, def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
transform_source=lambda s: fix_xml_ampersands(s).strip(), transform_source=lambda s: fix_xml_ampersands(s).strip(),
fatal=True): fatal=True):
# currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
if akamai_pv is not None and ';' in akamai_pv.text:
playerVerificationChallenge = akamai_pv.text.split(';')[0]
if playerVerificationChallenge.strip() != '':
return []
formats = [] formats = []
manifest_version = '1.0' manifest_version = '1.0'
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media') media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')

View File

@ -23,7 +23,7 @@ class EaglePlatformIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
# http://lenta.ru/news/2015/03/06/navalny/ # http://lenta.ru/news/2015/03/06/navalny/
'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201', 'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
'md5': '881ee8460e1b7735a8be938e2ffb362b', # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
'info_dict': { 'info_dict': {
'id': '227304', 'id': '227304',
'ext': 'mp4', 'ext': 'mp4',
@ -109,8 +109,11 @@ class EaglePlatformIE(InfoExtractor):
mobj = re.search('/([^/]+)/index\.m3u8', m3u8_format['url']) mobj = re.search('/([^/]+)/index\.m3u8', m3u8_format['url'])
if mobj: if mobj:
http_format = m3u8_format.copy() http_format = m3u8_format.copy()
video_url = mp4_url.replace(mp4_url_basename, mobj.group(1))
if not self._is_valid_url(video_url, video_id):
continue
http_format.update({ http_format.update({
'url': mp4_url.replace(mp4_url_basename, mobj.group(1)), 'url': video_url,
'format_id': m3u8_format['format_id'].replace('hls', 'http'), 'format_id': m3u8_format['format_id'].replace('hls', 'http'),
'protocol': 'http', 'protocol': 'http',
}) })

View File

@ -439,7 +439,6 @@ from .mtv import (
) )
from .muenchentv import MuenchenTVIE from .muenchentv import MuenchenTVIE
from .musicplayon import MusicPlayOnIE from .musicplayon import MusicPlayOnIE
from .muzu import MuzuTVIE
from .mwave import MwaveIE from .mwave import MwaveIE
from .myspace import MySpaceIE, MySpaceAlbumIE from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE from .myspass import MySpassIE

View File

@ -887,6 +887,7 @@ class GenericIE(InfoExtractor):
# Eagle.Platform embed (generic URL) # Eagle.Platform embed (generic URL)
{ {
'url': 'http://lenta.ru/news/2015/03/06/navalny/', 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
'info_dict': { 'info_dict': {
'id': '227304', 'id': '227304',
'ext': 'mp4', 'ext': 'mp4',
@ -901,6 +902,7 @@ class GenericIE(InfoExtractor):
# ClipYou (Eagle.Platform) embed (custom URL) # ClipYou (Eagle.Platform) embed (custom URL)
{ {
'url': 'http://muz-tv.ru/play/7129/', 'url': 'http://muz-tv.ru/play/7129/',
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
'info_dict': { 'info_dict': {
'id': '12820', 'id': '12820',
'ext': 'mp4', 'ext': 'mp4',

View File

@ -15,9 +15,9 @@ class MiTeleIE(InfoExtractor):
IE_DESC = 'mitele.es' IE_DESC = 'mitele.es'
_VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/' _VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
_TESTS = [{ _TEST = {
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
'md5': '0ff1a13aebb35d9bc14081ff633dd324', # MD5 is unstable
'info_dict': { 'info_dict': {
'id': '0NF1jJnxS1Wu3pHrmvFyw2', 'id': '0NF1jJnxS1Wu3pHrmvFyw2',
'display_id': 'programa-144', 'display_id': 'programa-144',
@ -27,7 +27,7 @@ class MiTeleIE(InfoExtractor):
'thumbnail': 're:(?i)^https?://.*\.jpg$', 'thumbnail': 're:(?i)^https?://.*\.jpg$',
'duration': 2913, 'duration': 2913,
}, },
}] }
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)

View File

@ -1,63 +0,0 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlencode
class MuzuTVIE(InfoExtractor):
_VALID_URL = r'https?://www\.muzu\.tv/(.+?)/(.+?)/(?P<id>\d+)'
IE_NAME = 'muzu.tv'
_TEST = {
'url': 'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/',
'md5': '98f8b2c7bc50578d6a0364fff2bfb000',
'info_dict': {
'id': '1981454',
'ext': 'mp4',
'title': 'Cat Walk (Original Mix)',
'description': 'md5:90e868994de201b2570e4e5854e19420',
'uploader': 'MarcAshken featuring SOS',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
info_data = compat_urllib_parse_urlencode({
'format': 'json',
'url': url,
})
info = self._download_json(
'http://www.muzu.tv/api/oembed/?%s' % info_data,
video_id, 'Downloading video info')
player_info = self._download_json(
'http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
video_id, 'Downloading player info')
video_info = player_info['videos'][0]
for quality in ['1080', '720', '480', '360']:
if video_info.get('v%s' % quality):
break
data = compat_urllib_parse_urlencode({
'ai': video_id,
# Even if each time you watch a video the hash changes,
# it seems to work for different videos, and it will work
# even if you use any non empty string as a hash
'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k',
'device': 'web',
'qv': quality,
})
video_url_info = self._download_json(
'http://player.muzu.tv/player/requestVideo?%s' % data,
video_id, 'Downloading video url')
video_url = video_url_info['url']
return {
'id': video_id,
'title': info['title'],
'url': video_url,
'thumbnail': info['thumbnail_url'],
'description': info['description'],
'uploader': info['author_name'],
}

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from .screenwavemedia import ScreenwaveMediaIE
from ..utils import ( from ..utils import (
unified_strdate, unified_strdate,
@ -12,7 +13,6 @@ class NormalbootsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?normalboots\.com/video/(?P<id>[0-9a-z-]*)/?$' _VALID_URL = r'https?://(?:www\.)?normalboots\.com/video/(?P<id>[0-9a-z-]*)/?$'
_TEST = { _TEST = {
'url': 'http://normalboots.com/video/home-alone-games-jontron/', 'url': 'http://normalboots.com/video/home-alone-games-jontron/',
'md5': '8bf6de238915dd501105b44ef5f1e0f6',
'info_dict': { 'info_dict': {
'id': 'home-alone-games-jontron', 'id': 'home-alone-games-jontron',
'ext': 'mp4', 'ext': 'mp4',
@ -22,9 +22,10 @@ class NormalbootsIE(InfoExtractor):
'upload_date': '20140125', 'upload_date': '20140125',
}, },
'params': { 'params': {
# rtmp download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['ScreenwaveMedia'],
} }
def _real_extract(self, url): def _real_extract(self, url):
@ -38,16 +39,15 @@ class NormalbootsIE(InfoExtractor):
r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>', r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
webpage, 'date', fatal=False)) webpage, 'date', fatal=False))
player_url = self._html_search_regex( screenwavemedia_url = self._html_search_regex(
r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', ScreenwaveMediaIE.EMBED_PATTERN, webpage, 'screenwave URL',
webpage, 'player url') group='url')
player_page = self._download_webpage(player_url, video_id)
video_url = self._html_search_regex(
r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file')
return { return {
'_type': 'url_transparent',
'id': video_id, 'id': video_id,
'url': video_url, 'url': screenwavemedia_url,
'ie_key': ScreenwaveMediaIE.ie_key(),
'title': self._og_search_title(webpage), 'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage), 'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),

View File

@ -23,7 +23,7 @@ class NRKIE(InfoExtractor):
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.nrk.no/video/PS*150533', 'url': 'http://www.nrk.no/video/PS*150533',
'md5': 'bccd850baebefe23b56d708a113229c2', # MD5 is unstable
'info_dict': { 'info_dict': {
'id': '150533', 'id': '150533',
'ext': 'flv', 'ext': 'flv',
@ -34,7 +34,7 @@ class NRKIE(InfoExtractor):
}, },
{ {
'url': 'http://www.nrk.no/video/PS*154915', 'url': 'http://www.nrk.no/video/PS*154915',
'md5': '0b1493ba1aae7d9579a5ad5531bc395a', # MD5 is unstable
'info_dict': { 'info_dict': {
'id': '154915', 'id': '154915',
'ext': 'flv', 'ext': 'flv',

View File

@ -2,7 +2,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote from ..compat import (
compat_parse_qs,
compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
unified_strdate, unified_strdate,
@ -32,7 +36,7 @@ class OdnoklassnikiIE(InfoExtractor):
'skip': 'Video has been blocked', 'skip': 'Video has been blocked',
}, { }, {
# metadataUrl # metadataUrl
'url': 'http://ok.ru/video/63567059965189-0', 'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
'md5': '9676cf86eff5391d35dea675d224e131', 'md5': '9676cf86eff5391d35dea675d224e131',
'info_dict': { 'info_dict': {
'id': '63567059965189-0', 'id': '63567059965189-0',
@ -44,6 +48,7 @@ class OdnoklassnikiIE(InfoExtractor):
'uploader': '☭ Андрей Мещанинов ☭', 'uploader': '☭ Андрей Мещанинов ☭',
'like_count': int, 'like_count': int,
'age_limit': 0, 'age_limit': 0,
'start_time': 5,
}, },
}, { }, {
# YouTube embed (metadataUrl, provider == USER_YOUTUBE) # YouTube embed (metadataUrl, provider == USER_YOUTUBE)
@ -60,6 +65,22 @@ class OdnoklassnikiIE(InfoExtractor):
'uploader': 'Алина П', 'uploader': 'Алина П',
'age_limit': 0, 'age_limit': 0,
}, },
}, {
# YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
'url': 'http://ok.ru/video/62036049272859-0',
'info_dict': {
'id': '62036049272859-0',
'ext': 'mp4',
'title': 'МУЗЫКА ДОЖДЯ .',
'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0',
'upload_date': '20120106',
'uploader_id': '473534735899',
'uploader': 'МARINA D',
'age_limit': 0,
},
'params': {
'skip_download': True,
},
}, { }, {
'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452', 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
'only_matching': True, 'only_matching': True,
@ -78,6 +99,9 @@ class OdnoklassnikiIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
start_time = int_or_none(compat_parse_qs(
compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0])
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage( webpage = self._download_webpage(
@ -106,7 +130,14 @@ class OdnoklassnikiIE(InfoExtractor):
video_id, 'Downloading metadata JSON') video_id, 'Downloading metadata JSON')
movie = metadata['movie'] movie = metadata['movie']
title = movie['title']
# Some embedded videos may not contain title in movie dict (e.g.
# http://ok.ru/video/62036049272859-0) thus we allow missing title
# here and it's going to be extracted later by an extractor that
# will process the actual embed.
provider = metadata.get('provider')
title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title')
thumbnail = movie.get('poster') thumbnail = movie.get('poster')
duration = int_or_none(movie.get('duration')) duration = int_or_none(movie.get('duration'))
@ -135,9 +166,10 @@ class OdnoklassnikiIE(InfoExtractor):
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'like_count': like_count, 'like_count': like_count,
'age_limit': age_limit, 'age_limit': age_limit,
'start_time': start_time,
} }
if metadata.get('provider') == 'USER_YOUTUBE': if provider == 'USER_YOUTUBE':
info.update({ info.update({
'_type': 'url_transparent', '_type': 'url_transparent',
'url': movie['contentId'], 'url': movie['contentId'],

View File

@ -12,7 +12,7 @@ from ..utils import (
class ScreenwaveMediaIE(InfoExtractor): class ScreenwaveMediaIE(InfoExtractor):
_VALID_URL = r'https?://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)' _VALID_URL = r'(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)'
EMBED_PATTERN = r'src=(["\'])(?P<url>(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=.+?)\1' EMBED_PATTERN = r'src=(["\'])(?P<url>(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=.+?)\1'
_TESTS = [{ _TESTS = [{
'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911', 'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',

View File

@ -49,6 +49,7 @@ class UnistraIE(InfoExtractor):
'format_id': format_id, 'format_id': format_id,
'quality': quality(format_id) 'quality': quality(format_id)
}) })
self._sort_formats(formats)
title = self._html_search_regex( title = self._html_search_regex(
r'<title>UTV - (.*?)</', webpage, 'title') r'<title>UTV - (.*?)</', webpage, 'title')