This commit is contained in:
Gilles Habran 2016-04-26 13:17:06 +02:00
commit d0170f518f
11 changed files with 66 additions and 85 deletions

View File

@ -1006,6 +1006,13 @@ class InfoExtractor(object):
def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
transform_source=lambda s: fix_xml_ampersands(s).strip(),
fatal=True):
# currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
if akamai_pv is not None and ';' in akamai_pv.text:
playerVerificationChallenge = akamai_pv.text.split(';')[0]
if playerVerificationChallenge.strip() != '':
return []
formats = []
manifest_version = '1.0'
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')

View File

@ -23,7 +23,7 @@ class EaglePlatformIE(InfoExtractor):
_TESTS = [{
# http://lenta.ru/news/2015/03/06/navalny/
'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
'md5': '881ee8460e1b7735a8be938e2ffb362b',
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
'info_dict': {
'id': '227304',
'ext': 'mp4',
@ -109,8 +109,11 @@ class EaglePlatformIE(InfoExtractor):
mobj = re.search('/([^/]+)/index\.m3u8', m3u8_format['url'])
if mobj:
http_format = m3u8_format.copy()
video_url = mp4_url.replace(mp4_url_basename, mobj.group(1))
if not self._is_valid_url(video_url, video_id):
continue
http_format.update({
'url': mp4_url.replace(mp4_url_basename, mobj.group(1)),
'url': video_url,
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
'protocol': 'http',
})

View File

@ -439,7 +439,6 @@ from .mtv import (
)
from .muenchentv import MuenchenTVIE
from .musicplayon import MusicPlayOnIE
from .muzu import MuzuTVIE
from .mwave import MwaveIE
from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE

View File

@ -887,6 +887,7 @@ class GenericIE(InfoExtractor):
# Eagle.Platform embed (generic URL)
{
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
'info_dict': {
'id': '227304',
'ext': 'mp4',
@ -901,6 +902,7 @@ class GenericIE(InfoExtractor):
# ClipYou (Eagle.Platform) embed (custom URL)
{
'url': 'http://muz-tv.ru/play/7129/',
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
'info_dict': {
'id': '12820',
'ext': 'mp4',

View File

@ -15,9 +15,9 @@ class MiTeleIE(InfoExtractor):
IE_DESC = 'mitele.es'
_VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
_TESTS = [{
_TEST = {
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
'md5': '0ff1a13aebb35d9bc14081ff633dd324',
# MD5 is unstable
'info_dict': {
'id': '0NF1jJnxS1Wu3pHrmvFyw2',
'display_id': 'programa-144',
@ -27,7 +27,7 @@ class MiTeleIE(InfoExtractor):
'thumbnail': 're:(?i)^https?://.*\.jpg$',
'duration': 2913,
},
}]
}
def _real_extract(self, url):
display_id = self._match_id(url)

View File

@ -1,63 +0,0 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlencode
class MuzuTVIE(InfoExtractor):
_VALID_URL = r'https?://www\.muzu\.tv/(.+?)/(.+?)/(?P<id>\d+)'
IE_NAME = 'muzu.tv'
_TEST = {
'url': 'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/',
'md5': '98f8b2c7bc50578d6a0364fff2bfb000',
'info_dict': {
'id': '1981454',
'ext': 'mp4',
'title': 'Cat Walk (Original Mix)',
'description': 'md5:90e868994de201b2570e4e5854e19420',
'uploader': 'MarcAshken featuring SOS',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
info_data = compat_urllib_parse_urlencode({
'format': 'json',
'url': url,
})
info = self._download_json(
'http://www.muzu.tv/api/oembed/?%s' % info_data,
video_id, 'Downloading video info')
player_info = self._download_json(
'http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
video_id, 'Downloading player info')
video_info = player_info['videos'][0]
for quality in ['1080', '720', '480', '360']:
if video_info.get('v%s' % quality):
break
data = compat_urllib_parse_urlencode({
'ai': video_id,
# Even if each time you watch a video the hash changes,
# it seems to work for different videos, and it will work
# even if you use any non empty string as a hash
'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k',
'device': 'web',
'qv': quality,
})
video_url_info = self._download_json(
'http://player.muzu.tv/player/requestVideo?%s' % data,
video_id, 'Downloading video url')
video_url = video_url_info['url']
return {
'id': video_id,
'title': info['title'],
'url': video_url,
'thumbnail': info['thumbnail_url'],
'description': info['description'],
'uploader': info['author_name'],
}

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from .screenwavemedia import ScreenwaveMediaIE
from ..utils import (
unified_strdate,
@ -12,7 +13,6 @@ class NormalbootsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?normalboots\.com/video/(?P<id>[0-9a-z-]*)/?$'
_TEST = {
'url': 'http://normalboots.com/video/home-alone-games-jontron/',
'md5': '8bf6de238915dd501105b44ef5f1e0f6',
'info_dict': {
'id': 'home-alone-games-jontron',
'ext': 'mp4',
@ -22,9 +22,10 @@ class NormalbootsIE(InfoExtractor):
'upload_date': '20140125',
},
'params': {
# rtmp download
# m3u8 download
'skip_download': True,
},
'add_ie': ['ScreenwaveMedia'],
}
def _real_extract(self, url):
@ -38,16 +39,15 @@ class NormalbootsIE(InfoExtractor):
r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
webpage, 'date', fatal=False))
player_url = self._html_search_regex(
r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"',
webpage, 'player url')
player_page = self._download_webpage(player_url, video_id)
video_url = self._html_search_regex(
r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file')
screenwavemedia_url = self._html_search_regex(
ScreenwaveMediaIE.EMBED_PATTERN, webpage, 'screenwave URL',
group='url')
return {
'_type': 'url_transparent',
'id': video_id,
'url': video_url,
'url': screenwavemedia_url,
'ie_key': ScreenwaveMediaIE.ie_key(),
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),

View File

@ -23,7 +23,7 @@ class NRKIE(InfoExtractor):
_TESTS = [
{
'url': 'http://www.nrk.no/video/PS*150533',
'md5': 'bccd850baebefe23b56d708a113229c2',
# MD5 is unstable
'info_dict': {
'id': '150533',
'ext': 'flv',
@ -34,7 +34,7 @@ class NRKIE(InfoExtractor):
},
{
'url': 'http://www.nrk.no/video/PS*154915',
'md5': '0b1493ba1aae7d9579a5ad5531bc395a',
# MD5 is unstable
'info_dict': {
'id': '154915',
'ext': 'flv',

View File

@ -2,7 +2,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
from ..compat import (
compat_parse_qs,
compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
)
from ..utils import (
ExtractorError,
unified_strdate,
@ -32,7 +36,7 @@ class OdnoklassnikiIE(InfoExtractor):
'skip': 'Video has been blocked',
}, {
# metadataUrl
'url': 'http://ok.ru/video/63567059965189-0',
'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
'md5': '9676cf86eff5391d35dea675d224e131',
'info_dict': {
'id': '63567059965189-0',
@ -44,6 +48,7 @@ class OdnoklassnikiIE(InfoExtractor):
'uploader': '☭ Андрей Мещанинов ☭',
'like_count': int,
'age_limit': 0,
'start_time': 5,
},
}, {
# YouTube embed (metadataUrl, provider == USER_YOUTUBE)
@ -60,6 +65,22 @@ class OdnoklassnikiIE(InfoExtractor):
'uploader': 'Алина П',
'age_limit': 0,
},
}, {
# YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
'url': 'http://ok.ru/video/62036049272859-0',
'info_dict': {
'id': '62036049272859-0',
'ext': 'mp4',
'title': 'МУЗЫКА ДОЖДЯ .',
'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0',
'upload_date': '20120106',
'uploader_id': '473534735899',
'uploader': 'МARINA D',
'age_limit': 0,
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
'only_matching': True,
@ -78,6 +99,9 @@ class OdnoklassnikiIE(InfoExtractor):
}]
def _real_extract(self, url):
start_time = int_or_none(compat_parse_qs(
compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0])
video_id = self._match_id(url)
webpage = self._download_webpage(
@ -106,7 +130,14 @@ class OdnoklassnikiIE(InfoExtractor):
video_id, 'Downloading metadata JSON')
movie = metadata['movie']
title = movie['title']
# Some embedded videos may not contain title in movie dict (e.g.
# http://ok.ru/video/62036049272859-0) thus we allow missing title
# here and it's going to be extracted later by an extractor that
# will process the actual embed.
provider = metadata.get('provider')
title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title')
thumbnail = movie.get('poster')
duration = int_or_none(movie.get('duration'))
@ -135,9 +166,10 @@ class OdnoklassnikiIE(InfoExtractor):
'uploader_id': uploader_id,
'like_count': like_count,
'age_limit': age_limit,
'start_time': start_time,
}
if metadata.get('provider') == 'USER_YOUTUBE':
if provider == 'USER_YOUTUBE':
info.update({
'_type': 'url_transparent',
'url': movie['contentId'],

View File

@ -12,7 +12,7 @@ from ..utils import (
class ScreenwaveMediaIE(InfoExtractor):
_VALID_URL = r'https?://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)'
_VALID_URL = r'(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)'
EMBED_PATTERN = r'src=(["\'])(?P<url>(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=.+?)\1'
_TESTS = [{
'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',

View File

@ -49,6 +49,7 @@ class UnistraIE(InfoExtractor):
'format_id': format_id,
'quality': quality(format_id)
})
self._sort_formats(formats)
title = self._html_search_regex(
r'<title>UTV - (.*?)</', webpage, 'title')