This commit is contained in:
gillouche 2016-04-30 13:50:58 +02:00
commit 733e0ed943
27 changed files with 544 additions and 373 deletions

View File

@ -698,6 +698,10 @@ YouTube changed their playlist format in March 2014 and later on, so you'll need
If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to [report bugs](https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to the [Ubuntu packaging guys](mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl) - all they have to do is update the package to a somewhat recent version. See above for a way to update. If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to [report bugs](https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to the [Ubuntu packaging guys](mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl) - all they have to do is update the package to a somewhat recent version. See above for a way to update.
### I'm getting an error when trying to use output template: `error: using output template conflicts with using title, video ID or auto number`
Make sure you are not using `-o` with any of these options `-t`, `--title`, `--id`, `-A` or `--auto-number` set in command line or in a configuration file. Remove the latter if any.
### Do I always have to pass `-citw`? ### Do I always have to pass `-citw`?
By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, the only option out of `-citw` that is regularly useful is `-i`. By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, the only option out of `-citw` that is regularly useful is `-i`.
@ -718,7 +722,7 @@ Videos or video formats streamed via RTMP protocol can only be downloaded when [
### I have downloaded a video but how can I play it? ### I have downloaded a video but how can I play it?
Once the video is fully downloaded, use any video player, such as [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/). Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/).
### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser. ### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser.

View File

@ -63,7 +63,7 @@ class ArteTvIE(InfoExtractor):
class ArteTVPlus7IE(InfoExtractor): class ArteTVPlus7IE(InfoExtractor):
IE_NAME = 'arte.tv:+7' IE_NAME = 'arte.tv:+7'
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&+])' _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&]+)'
@classmethod @classmethod
def _extract_url_info(cls, url): def _extract_url_info(cls, url):

View File

@ -19,7 +19,7 @@ from ..utils import (
class CloudyIE(InfoExtractor): class CloudyIE(InfoExtractor):
_IE_DESC = 'cloudy.ec and videoraj.ch' _IE_DESC = 'cloudy.ec and videoraj.ch'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?://(?:www\.)?(?P<host>cloudy\.ec|videoraj\.ch)/ https?://(?:www\.)?(?P<host>cloudy\.ec|videoraj\.(?:ch|to))/
(?:v/|embed\.php\?id=) (?:v/|embed\.php\?id=)
(?P<id>[A-Za-z0-9]+) (?P<id>[A-Za-z0-9]+)
''' '''
@ -37,7 +37,7 @@ class CloudyIE(InfoExtractor):
} }
}, },
{ {
'url': 'http://www.videoraj.ch/v/47f399fd8bb60', 'url': 'http://www.videoraj.to/v/47f399fd8bb60',
'md5': '7d0f8799d91efd4eda26587421c3c3b0', 'md5': '7d0f8799d91efd4eda26587421c3c3b0',
'info_dict': { 'info_dict': {
'id': '47f399fd8bb60', 'id': '47f399fd8bb60',

View File

@ -1061,7 +1061,7 @@ class InfoExtractor(object):
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
entry_protocol='m3u8', preference=None, entry_protocol='m3u8', preference=None,
m3u8_id=None, note=None, errnote=None, m3u8_id=None, note=None, errnote=None,
fatal=True): fatal=True, live=False):
formats = [{ formats = [{
'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])), 'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
@ -1139,7 +1139,11 @@ class InfoExtractor(object):
if m3u8_id: if m3u8_id:
format_id.append(m3u8_id) format_id.append(m3u8_id)
last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None
format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats))) # Bandwidth of live streams may differ over time thus making
# format_id unpredictable. So it's better to keep provided
# format_id intact.
if not live:
format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
f = { f = {
'format_id': '-'.join(format_id), 'format_id': '-'.join(format_id),
'url': format_url(line.strip()), 'url': format_url(line.strip()),

View File

@ -11,7 +11,6 @@ from math import pow, sqrt, floor
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_etree_fromstring, compat_etree_fromstring,
compat_urllib_parse_unquote,
compat_urllib_parse_urlencode, compat_urllib_parse_urlencode,
compat_urllib_request, compat_urllib_request,
compat_urlparse, compat_urlparse,
@ -27,6 +26,7 @@ from ..utils import (
unified_strdate, unified_strdate,
urlencode_postdata, urlencode_postdata,
xpath_text, xpath_text,
extract_attributes,
) )
from ..aes import ( from ..aes import (
aes_cbc_decrypt, aes_cbc_decrypt,
@ -306,28 +306,36 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage, r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage,
'video_uploader', fatal=False) 'video_uploader', fatal=False)
playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url')) available_fmts = []
playerdata_req = sanitized_Request(playerdata_url) for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
playerdata_req.data = urlencode_postdata({'current_page': webpage_url}) attrs = extract_attributes(a)
playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') href = attrs.get('href')
playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info') if href and '/freetrial' in href:
continue
stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, 'stream_id') available_fmts.append(fmt)
video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False) if not available_fmts:
for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
available_fmts = re.findall(p, webpage)
if available_fmts:
break
video_encode_ids = []
formats = [] formats = []
for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage): for fmt in available_fmts:
stream_quality, stream_format = self._FORMAT_IDS[fmt] stream_quality, stream_format = self._FORMAT_IDS[fmt]
video_format = fmt + 'p' video_format = fmt + 'p'
streamdata_req = sanitized_Request( streamdata_req = sanitized_Request(
'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s' 'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'
% (stream_id, stream_format, stream_quality), % (video_id, stream_format, stream_quality),
compat_urllib_parse_urlencode({'current_page': url}).encode('utf-8')) compat_urllib_parse_urlencode({'current_page': url}).encode('utf-8'))
streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
streamdata = self._download_xml( streamdata = self._download_xml(
streamdata_req, video_id, streamdata_req, video_id,
note='Downloading media info for %s' % video_format) note='Downloading media info for %s' % video_format)
stream_info = streamdata.find('./{default}preload/stream_info') stream_info = streamdata.find('./{default}preload/stream_info')
video_encode_id = xpath_text(stream_info, './video_encode_id')
if video_encode_id in video_encode_ids:
continue
video_encode_ids.append(video_encode_id)
video_url = xpath_text(stream_info, './host') video_url = xpath_text(stream_info, './host')
video_play_path = xpath_text(stream_info, './file') video_play_path = xpath_text(stream_info, './file')
if not video_url or not video_play_path: if not video_url or not video_play_path:
@ -359,6 +367,14 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'ext': 'flv', 'ext': 'flv',
}) })
formats.append(format_info) formats.append(format_info)
self._sort_formats(formats)
metadata = self._download_xml(
'http://www.crunchyroll.com/xml', video_id,
note='Downloading media info', query={
'req': 'RpcApiVideoPlayer_GetMediaMetadata',
'media_id': video_id,
})
subtitles = self.extract_subtitles(video_id, webpage) subtitles = self.extract_subtitles(video_id, webpage)
@ -366,9 +382,12 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'id': video_id, 'id': video_id,
'title': video_title, 'title': video_title,
'description': video_description, 'description': video_description,
'thumbnail': video_thumbnail, 'thumbnail': xpath_text(metadata, 'episode_image_url'),
'uploader': video_uploader, 'uploader': video_uploader,
'upload_date': video_upload_date, 'upload_date': video_upload_date,
'series': xpath_text(metadata, 'series_title'),
'episode': xpath_text(metadata, 'episode_title'),
'episode_number': int_or_none(xpath_text(metadata, 'episode_number')),
'subtitles': subtitles, 'subtitles': subtitles,
'formats': formats, 'formats': formats,
} }

View File

@ -9,7 +9,7 @@ from ..utils import (
class CWTVIE(InfoExtractor): class CWTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cw(?:tv|seed)\.com/shows/(?:[^/]+/){2}\?play=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})' _VALID_URL = r'https?://(?:www\.)?cw(?:tv|seed)\.com/(?:shows/)?(?:[^/]+/){2}\?.*\bplay=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
_TESTS = [{ _TESTS = [{
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63', 'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
'info_dict': { 'info_dict': {
@ -48,6 +48,9 @@ class CWTVIE(InfoExtractor):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
} }
}, {
'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -12,39 +12,46 @@ class DFBIE(InfoExtractor):
_TEST = { _TEST = {
'url': 'http://tv.dfb.de/video/u-19-em-stimmen-zum-spiel-gegen-russland/11633/', 'url': 'http://tv.dfb.de/video/u-19-em-stimmen-zum-spiel-gegen-russland/11633/',
# The md5 is different each time 'md5': 'ac0f98a52a330f700b4b3034ad240649',
'info_dict': { 'info_dict': {
'id': '11633', 'id': '11633',
'display_id': 'u-19-em-stimmen-zum-spiel-gegen-russland', 'display_id': 'u-19-em-stimmen-zum-spiel-gegen-russland',
'ext': 'flv', 'ext': 'mp4',
'title': 'U 19-EM: Stimmen zum Spiel gegen Russland', 'title': 'U 19-EM: Stimmen zum Spiel gegen Russland',
'upload_date': '20150714', 'upload_date': '20150714',
}, },
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) display_id, video_id = re.match(self._VALID_URL, url).groups()
video_id = mobj.group('id')
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
player_info = self._download_xml( player_info = self._download_xml(
'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id, 'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
display_id) display_id)
video_info = player_info.find('video') video_info = player_info.find('video')
stream_access_url = self._proto_relative_url(video_info.find('url').text.strip())
f4m_info = self._download_xml( formats = []
self._proto_relative_url(video_info.find('url').text.strip()), display_id) # see http://tv.dfb.de/player/js/ajax.js for the method to extract m3u8 formats
token_el = f4m_info.find('token') for sa_url in (stream_access_url, stream_access_url + '&area=&format=iphone'):
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0' stream_access_info = self._download_xml(sa_url, display_id)
formats = self._extract_f4m_formats(manifest_url, display_id) token_el = stream_access_info.find('token')
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth']
if '.f4m' in manifest_url:
formats.extend(self._extract_f4m_formats(
manifest_url + '&hdcore=3.2.0',
display_id, f4m_id='hds', fatal=False))
else:
formats.extend(self._extract_m3u8_formats(
manifest_url, display_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': video_info.find('title').text, 'title': video_info.find('title').text,
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': 'http://tv.dfb.de/images/%s_640x360.jpg' % video_id,
'upload_date': unified_strdate(video_info.find('time_date').text), 'upload_date': unified_strdate(video_info.find('time_date').text),
'formats': formats, 'formats': formats,
} }

View File

@ -400,7 +400,6 @@ from .macgamestore import MacGameStoreIE
from .mailru import MailRuIE from .mailru import MailRuIE
from .makerschannel import MakersChannelIE from .makerschannel import MakersChannelIE
from .makertv import MakerTVIE from .makertv import MakerTVIE
from .malemotion import MalemotionIE
from .matchtv import MatchTVIE from .matchtv import MatchTVIE
from .mdr import MDRIE from .mdr import MDRIE
from .metacafe import MetacafeIE from .metacafe import MetacafeIE
@ -439,7 +438,7 @@ from .mtv import (
) )
from .muenchentv import MuenchenTVIE from .muenchentv import MuenchenTVIE
from .musicplayon import MusicPlayOnIE from .musicplayon import MusicPlayOnIE
from .mwave import MwaveIE from .mwave import MwaveIE, MwaveMeetGreetIE
from .myspace import MySpaceIE, MySpaceAlbumIE from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE from .myspass import MySpassIE
from .myvi import MyviIE from .myvi import MyviIE
@ -658,7 +657,6 @@ from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
from .senateisvp import SenateISVPIE from .senateisvp import SenateISVPIE
from .servingsys import ServingSysIE from .servingsys import ServingSysIE
from .sexu import SexuIE from .sexu import SexuIE
from .sexykarma import SexyKarmaIE
from .shahid import ShahidIE from .shahid import ShahidIE
from .shared import SharedIE from .shared import SharedIE
from .sharesix import ShareSixIE from .sharesix import ShareSixIE
@ -675,10 +673,6 @@ from .smotri import (
SmotriUserIE, SmotriUserIE,
SmotriBroadcastIE, SmotriBroadcastIE,
) )
from .snagfilms import (
SnagFilmsIE,
SnagFilmsEmbedIE,
)
from .snotr import SnotrIE from .snotr import SnotrIE
from .sohu import SohuIE from .sohu import SohuIE
from .soundcloud import ( from .soundcloud import (
@ -881,6 +875,10 @@ from .vidme import (
) )
from .vidzi import VidziIE from .vidzi import VidziIE
from .vier import VierIE, VierVideosIE from .vier import VierIE, VierVideosIE
from .viewlift import (
ViewLiftIE,
ViewLiftEmbedIE,
)
from .viewster import ViewsterIE from .viewster import ViewsterIE
from .viidea import ViideaIE from .viidea import ViideaIE
from .vimeo import ( from .vimeo import (
@ -919,6 +917,7 @@ from .vulture import VultureIE
from .walla import WallaIE from .walla import WallaIE
from .washingtonpost import WashingtonPostIE from .washingtonpost import WashingtonPostIE
from .wat import WatIE from .wat import WatIE
from .watchindianporn import WatchIndianPornIE
from .wdr import ( from .wdr import (
WDRIE, WDRIE,
WDRMobileIE, WDRMobileIE,

View File

@ -2,6 +2,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_urllib_parse_unquote_plus,
)
from ..utils import ( from ..utils import (
clean_html, clean_html,
determine_ext, determine_ext,
@ -27,6 +31,7 @@ class FunimationIE(InfoExtractor):
'description': 'md5:1769f43cd5fc130ace8fd87232207892', 'description': 'md5:1769f43cd5fc130ace8fd87232207892',
'thumbnail': 're:https?://.*\.jpg', 'thumbnail': 're:https?://.*\.jpg',
}, },
'skip': 'Access without user interaction is forbidden by CloudFlare, and video removed',
}, { }, {
'url': 'http://www.funimation.com/shows/hacksign/videos/official/role-play', 'url': 'http://www.funimation.com/shows/hacksign/videos/official/role-play',
'info_dict': { 'info_dict': {
@ -37,6 +42,7 @@ class FunimationIE(InfoExtractor):
'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd', 'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
'thumbnail': 're:https?://.*\.jpg', 'thumbnail': 're:https?://.*\.jpg',
}, },
'skip': 'Access without user interaction is forbidden by CloudFlare',
}, { }, {
'url': 'http://www.funimation.com/shows/attack-on-titan-junior-high/videos/promotional/broadcast-dub-preview', 'url': 'http://www.funimation.com/shows/attack-on-titan-junior-high/videos/promotional/broadcast-dub-preview',
'info_dict': { 'info_dict': {
@ -47,8 +53,36 @@ class FunimationIE(InfoExtractor):
'description': 'md5:f8ec49c0aff702a7832cd81b8a44f803', 'description': 'md5:f8ec49c0aff702a7832cd81b8a44f803',
'thumbnail': 're:https?://.*\.(?:jpg|png)', 'thumbnail': 're:https?://.*\.(?:jpg|png)',
}, },
'skip': 'Access without user interaction is forbidden by CloudFlare',
}] }]
_LOGIN_URL = 'http://www.funimation.com/login'
def _download_webpage(self, *args, **kwargs):
try:
return super(FunimationIE, self)._download_webpage(*args, **kwargs)
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
response = ee.cause.read()
if b'>Please complete the security check to access<' in response:
raise ExtractorError(
'Access to funimation.com is blocked by CloudFlare. '
'Please browse to http://www.funimation.com/, solve '
'the reCAPTCHA, export browser cookies to a text file,'
' and then try again with --cookies YOUR_COOKIE_FILE.',
expected=True)
raise
def _extract_cloudflare_session_ua(self, url):
ci_session_cookie = self._get_cookies(url).get('ci_session')
if ci_session_cookie:
ci_session = compat_urllib_parse_unquote_plus(ci_session_cookie.value)
# ci_session is a string serialized by PHP function serialize()
# This case is simple enough to use regular expressions only
return self._search_regex(
r'"user_agent";s:\d+:"([^"]+)"', ci_session, 'user agent',
default=None)
def _login(self): def _login(self):
(username, password) = self._get_login_info() (username, password) = self._get_login_info()
if username is None: if username is None:
@ -57,8 +91,11 @@ class FunimationIE(InfoExtractor):
'email_field': username, 'email_field': username,
'password_field': password, 'password_field': password,
}) })
login_request = sanitized_Request('http://www.funimation.com/login', data, headers={ user_agent = self._extract_cloudflare_session_ua(self._LOGIN_URL)
'User-Agent': 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0', if not user_agent:
user_agent = 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'
login_request = sanitized_Request(self._LOGIN_URL, data, headers={
'User-Agent': user_agent,
'Content-Type': 'application/x-www-form-urlencoded' 'Content-Type': 'application/x-www-form-urlencoded'
}) })
login_page = self._download_webpage( login_page = self._download_webpage(
@ -103,11 +140,16 @@ class FunimationIE(InfoExtractor):
('mobile', 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'), ('mobile', 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'),
) )
user_agent = self._extract_cloudflare_session_ua(url)
if user_agent:
USER_AGENTS = ((None, user_agent),)
for kind, user_agent in USER_AGENTS: for kind, user_agent in USER_AGENTS:
request = sanitized_Request(url) request = sanitized_Request(url)
request.add_header('User-Agent', user_agent) request.add_header('User-Agent', user_agent)
webpage = self._download_webpage( webpage = self._download_webpage(
request, display_id, 'Downloading %s webpage' % kind) request, display_id,
'Downloading %s webpage' % kind if kind else 'Downloading webpage')
playlist = self._parse_json( playlist = self._parse_json(
self._search_regex( self._search_regex(

View File

@ -51,7 +51,7 @@ from .tnaflix import TNAFlixNetworkEmbedIE
from .vimeo import VimeoIE from .vimeo import VimeoIE
from .dailymotion import DailymotionCloudIE from .dailymotion import DailymotionCloudIE
from .onionstudios import OnionStudiosIE from .onionstudios import OnionStudiosIE
from .snagfilms import SnagFilmsEmbedIE from .viewlift import ViewLiftEmbedIE
from .screenwavemedia import ScreenwaveMediaIE from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE from .mtv import MTVServicesEmbeddedIE
from .pladform import PladformIE from .pladform import PladformIE
@ -1924,10 +1924,10 @@ class GenericIE(InfoExtractor):
if onionstudios_url: if onionstudios_url:
return self.url_result(onionstudios_url) return self.url_result(onionstudios_url)
# Look for SnagFilms embeds # Look for ViewLift embeds
snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage) viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
if snagfilms_url: if viewlift_url:
return self.url_result(snagfilms_url) return self.url_result(viewlift_url)
# Look for JWPlatform embeds # Look for JWPlatform embeds
jwplatform_url = JWPlatformIE._extract_url(webpage) jwplatform_url = JWPlatformIE._extract_url(webpage)

View File

@ -1,46 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
class MalemotionIE(InfoExtractor):
_VALID_URL = r'https?://malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
_TEST = {
'url': 'http://malemotion.com/video/bete-de-concours.ltc',
'md5': '3013e53a0afbde2878bc39998c33e8a5',
'info_dict': {
'id': 'ltc',
'ext': 'mp4',
'title': 'Bête de Concours',
'age_limit': 18,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = compat_urllib_parse_unquote(self._search_regex(
r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
video_title = self._html_search_regex(
r'<title>(.*?)</title', webpage, 'title')
video_thumbnail = self._search_regex(
r'<video .+?poster="(.+?)"', webpage, 'thumbnail', fatal=False)
formats = [{
'url': video_url,
'ext': 'mp4',
'format_id': 'mp4',
'preference': 1,
}]
self._sort_formats(formats)
return {
'id': video_id,
'formats': formats,
'title': video_title,
'thumbnail': video_thumbnail,
'age_limit': 18,
}

View File

@ -10,6 +10,7 @@ from ..utils import (
class MwaveIE(InfoExtractor): class MwaveIE(InfoExtractor):
_VALID_URL = r'https?://mwave\.interest\.me/mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)' _VALID_URL = r'https?://mwave\.interest\.me/mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)'
_URL_TEMPLATE = 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=%s'
_TEST = { _TEST = {
'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168859', 'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168859',
# md5 is unstable # md5 is unstable
@ -56,3 +57,28 @@ class MwaveIE(InfoExtractor):
'view_count': int_or_none(vod_info.get('hit')), 'view_count': int_or_none(vod_info.get('hit')),
'formats': formats, 'formats': formats,
} }
class MwaveMeetGreetIE(InfoExtractor):
_VALID_URL = r'https?://mwave\.interest\.me/meetgreet/view/(?P<id>\d+)'
_TEST = {
'url': 'http://mwave.interest.me/meetgreet/view/256',
'info_dict': {
'id': '173294',
'ext': 'flv',
'title': '[MEET&GREET] Park BoRam',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'Mwave',
'duration': 3634,
'view_count': int,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
clip_id = self._html_search_regex(
r'<iframe[^>]+src="/mnettv/ifr_clip\.m\?searchVideoDetailVO\.clip_id=(\d+)',
webpage, 'clip ID')
clip_url = MwaveIE._URL_TEMPLATE % clip_id
return self.url_result(clip_url, 'Mwave', clip_id)

View File

@ -5,8 +5,6 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
parse_duration, parse_duration,
sanitized_Request,
unified_strdate,
) )
@ -20,7 +18,6 @@ class NuvidIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Horny babes show their awesome bodeis and', 'title': 'Horny babes show their awesome bodeis and',
'duration': 129, 'duration': 129,
'upload_date': '20140508',
'age_limit': 18, 'age_limit': 18,
} }
} }
@ -28,28 +25,31 @@ class NuvidIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
formats = [] page_url = 'http://m.nuvid.com/video/%s' % video_id
webpage = self._download_webpage(
page_url, video_id, 'Downloading video page')
# When dwnld_speed exists and has a value larger than the MP4 file's
# bitrate, Nuvid returns the MP4 URL
# It's unit is 100bytes/millisecond, see mobile-nuvid-min.js for the algorithm
self._set_cookie('nuvid.com', 'dwnld_speed', '10.0')
mp4_webpage = self._download_webpage(
page_url, video_id, 'Downloading video page for MP4 format')
for dwnld_speed, format_id in [(0, '3gp'), (5, 'mp4')]: html5_video_re = r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']',
request = sanitized_Request( video_url = self._html_search_regex(html5_video_re, webpage, video_id)
'http://m.nuvid.com/play/%s' % video_id) mp4_video_url = self._html_search_regex(html5_video_re, mp4_webpage, video_id)
request.add_header('Cookie', 'skip_download_page=1; dwnld_speed=%d; adv_show=1' % dwnld_speed) formats = [{
webpage = self._download_webpage( 'url': video_url,
request, video_id, 'Downloading %s page' % format_id) }]
video_url = self._html_search_regex( if mp4_video_url != video_url:
r'<a\s+href="([^"]+)"\s+class="b_link">', webpage, '%s video URL' % format_id, fatal=False)
if not video_url:
continue
formats.append({ formats.append({
'url': video_url, 'url': mp4_video_url,
'format_id': format_id,
}) })
webpage = self._download_webpage(
'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page')
title = self._html_search_regex( title = self._html_search_regex(
[r'<span title="([^"]+)">', [r'<span title="([^"]+)">',
r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>'], webpage, 'title').strip() r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>',
r'<span[^>]+class="title_thumb">([^<]+)</span>'], webpage, 'title').strip()
thumbnails = [ thumbnails = [
{ {
'url': thumb_url, 'url': thumb_url,
@ -57,9 +57,8 @@ class NuvidIE(InfoExtractor):
] ]
thumbnail = thumbnails[0]['url'] if thumbnails else None thumbnail = thumbnails[0]['url'] if thumbnails else None
duration = parse_duration(self._html_search_regex( duration = parse_duration(self._html_search_regex(
r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})', webpage, 'duration', fatal=False)) [r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})',
upload_date = unified_strdate(self._html_search_regex( r'<span[^>]+class="view_time">([^<]+)</span>'], webpage, 'duration', fatal=False))
r'<i class="fa fa-user"></i>\s*(\d{4}-\d{2}-\d{2})', webpage, 'upload date', fatal=False))
return { return {
'id': video_id, 'id': video_id,
@ -67,7 +66,6 @@ class NuvidIE(InfoExtractor):
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'duration': duration, 'duration': duration,
'upload_date': upload_date,
'age_limit': 18, 'age_limit': 18,
'formats': formats, 'formats': formats,
} }

View File

@ -96,6 +96,8 @@ class OoyalaIE(OoyalaBaseIE):
'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', 'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
'duration': 853.386, 'duration': 853.386,
}, },
# The video in the original webpage now uses PlayWire
'skip': 'Ooyala said: movie expired',
}, { }, {
# Only available for ipad # Only available for ipad
'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0', 'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',

View File

@ -185,6 +185,7 @@ class ORFFM4IE(InfoExtractor):
'timestamp': 1452456073, 'timestamp': 1452456073,
'upload_date': '20160110', 'upload_date': '20160110',
}, },
'skip': 'Live streams on FM4 got deleted soon',
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -196,7 +196,7 @@ class PBSIE(InfoExtractor):
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/', 'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
'md5': 'ce1888486f0908d555a8093cac9a7362', 'md5': '173dc391afd361fa72eab5d3d918968d',
'info_dict': { 'info_dict': {
'id': '2365006249', 'id': '2365006249',
'ext': 'mp4', 'ext': 'mp4',
@ -204,13 +204,10 @@ class PBSIE(InfoExtractor):
'description': 'md5:36f341ae62e251b8f5bd2b754b95a071', 'description': 'md5:36f341ae62e251b8f5bd2b754b95a071',
'duration': 3190, 'duration': 3190,
}, },
'params': {
'skip_download': True, # requires ffmpeg
},
}, },
{ {
'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/', 'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/',
'md5': '143c98aa54a346738a3d78f54c925321', 'md5': '6f722cb3c3982186d34b0f13374499c7',
'info_dict': { 'info_dict': {
'id': '2365297690', 'id': '2365297690',
'ext': 'mp4', 'ext': 'mp4',
@ -218,9 +215,6 @@ class PBSIE(InfoExtractor):
'description': 'md5:4d3eaa01f94e61b3e73704735f1196d9', 'description': 'md5:4d3eaa01f94e61b3e73704735f1196d9',
'duration': 5050, 'duration': 5050,
}, },
'params': {
'skip_download': True, # requires ffmpeg
}
}, },
{ {
'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/', 'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/',
@ -244,9 +238,6 @@ class PBSIE(InfoExtractor):
'duration': 6559, 'duration': 6559,
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
}, },
'params': {
'skip_download': True, # requires ffmpeg
},
}, },
{ {
'url': 'http://www.pbs.org/wgbh/nova/earth/killer-typhoon.html', 'url': 'http://www.pbs.org/wgbh/nova/earth/killer-typhoon.html',
@ -262,9 +253,6 @@ class PBSIE(InfoExtractor):
'upload_date': '20140122', 'upload_date': '20140122',
'age_limit': 10, 'age_limit': 10,
}, },
'params': {
'skip_download': True, # requires ffmpeg
},
}, },
{ {
'url': 'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/', 'url': 'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/',
@ -290,6 +278,7 @@ class PBSIE(InfoExtractor):
}, },
{ {
'url': 'http://www.pbs.org/video/2365245528/', 'url': 'http://www.pbs.org/video/2365245528/',
'md5': '115223d41bd55cda8ae5cd5ed4e11497',
'info_dict': { 'info_dict': {
'id': '2365245528', 'id': '2365245528',
'display_id': '2365245528', 'display_id': '2365245528',
@ -299,15 +288,13 @@ class PBSIE(InfoExtractor):
'duration': 6851, 'duration': 6851,
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
}, },
'params': {
'skip_download': True, # requires ffmpeg
},
}, },
{ {
# Video embedded in iframe containing angle brackets as attribute's value (e.g. # Video embedded in iframe containing angle brackets as attribute's value (e.g.
# "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see # "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see
# https://github.com/rg3/youtube-dl/issues/7059) # https://github.com/rg3/youtube-dl/issues/7059)
'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/', 'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/',
'md5': '84ced42850d78f1d4650297356e95e6f',
'info_dict': { 'info_dict': {
'id': '2365546844', 'id': '2365546844',
'display_id': 'a-chefs-life-season-3-episode-5-prickly-business', 'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',
@ -317,9 +304,6 @@ class PBSIE(InfoExtractor):
'duration': 1480, 'duration': 1480,
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
}, },
'params': {
'skip_download': True, # requires ffmpeg
},
}, },
{ {
# Frontline video embedded via flp2012.js # Frontline video embedded via flp2012.js
@ -340,6 +324,7 @@ class PBSIE(InfoExtractor):
{ {
# Serves hd only via wigget/partnerplayer page # Serves hd only via wigget/partnerplayer page
'url': 'http://www.pbs.org/video/2365641075/', 'url': 'http://www.pbs.org/video/2365641075/',
'md5': 'acfd4c400b48149a44861cb16dd305cf',
'info_dict': { 'info_dict': {
'id': '2365641075', 'id': '2365641075',
'ext': 'mp4', 'ext': 'mp4',
@ -348,9 +333,6 @@ class PBSIE(InfoExtractor):
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
'formats': 'mincount:8', 'formats': 'mincount:8',
}, },
'params': {
'skip_download': True, # requires ffmpeg
},
}, },
{ {
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true', 'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
@ -494,6 +476,7 @@ class PBSIE(InfoExtractor):
info = video_info info = video_info
formats = [] formats = []
http_url = None
for num, redirect in enumerate(redirects): for num, redirect in enumerate(redirects):
redirect_id = redirect.get('eeid') redirect_id = redirect.get('eeid')
@ -514,13 +497,34 @@ class PBSIE(InfoExtractor):
if determine_ext(format_url) == 'm3u8': if determine_ext(format_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
format_url, display_id, 'mp4', preference=1, m3u8_id='hls')) format_url, display_id, 'mp4', m3u8_id='hls', fatal=False))
else: else:
formats.append({ formats.append({
'url': format_url, 'url': format_url,
'format_id': redirect_id, 'format_id': redirect_id,
}) })
if re.search(r'^https?://.*(?:\d+k|baseline)', format_url):
http_url = format_url
self._remove_duplicate_formats(formats) self._remove_duplicate_formats(formats)
m3u8_formats = list(filter(
lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
formats))
if http_url:
for m3u8_format in m3u8_formats:
bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
# extract only the formats that we know that they will be available as http format.
# https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
if not bitrate or bitrate not in ('192k', '400k', '800k', '1200k', '2500k'):
continue
if bitrate == '192k':
bitrate = 'baseline'
f = m3u8_format.copy()
f.update({
'url': re.sub(r'\d+k|baseline', bitrate, http_url),
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
'protocol': 'http',
})
formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
rating_str = info.get('rating') rating_str = info.get('rating')

View File

@ -20,18 +20,19 @@ class RtlNlIE(InfoExtractor):
(?P<id>[0-9a-f-]+)''' (?P<id>[0-9a-f-]+)'''
_TESTS = [{ _TESTS = [{
'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677', 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
'md5': 'cc16baa36a6c169391f0764fa6b16654', 'md5': '473d1946c1fdd050b2c0161a4b13c373',
'info_dict': { 'info_dict': {
'id': '6e4203a6-0a5e-3596-8424-c599a59e0677', 'id': '82b1aad1-4a14-3d7b-b554-b0aed1b2c416',
'ext': 'mp4', 'ext': 'mp4',
'title': 'RTL Nieuws - Laat', 'title': 'RTL Nieuws',
'description': 'md5:6b61f66510c8889923b11f2778c72dc5', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'timestamp': 1408051800, 'timestamp': 1461951000,
'upload_date': '20140814', 'upload_date': '20160429',
'duration': 576.880, 'duration': 1167.96,
}, },
}, { }, {
# best format avaialble a3t
'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false', 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
'md5': 'dea7474214af1271d91ef332fb8be7ea', 'md5': 'dea7474214af1271d91ef332fb8be7ea',
'info_dict': { 'info_dict': {
@ -39,18 +40,19 @@ class RtlNlIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'timestamp': 1424039400, 'timestamp': 1424039400,
'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag', 'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag',
'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$', 'thumbnail': 're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
'upload_date': '20150215', 'upload_date': '20150215',
'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.', 'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
} }
}, { }, {
# empty synopsis and missing episodes (see https://github.com/rg3/youtube-dl/issues/6275) # empty synopsis and missing episodes (see https://github.com/rg3/youtube-dl/issues/6275)
# best format available nettv
'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false', 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false',
'info_dict': { 'info_dict': {
'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a', 'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a',
'ext': 'mp4', 'ext': 'mp4',
'title': 'RTL Nieuws - Meer beelden van overval juwelier', 'title': 'RTL Nieuws - Meer beelden van overval juwelier',
'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$', 'thumbnail': 're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
'timestamp': 1437233400, 'timestamp': 1437233400,
'upload_date': '20150718', 'upload_date': '20150718',
'duration': 30.474, 'duration': 30.474,
@ -94,22 +96,44 @@ class RtlNlIE(InfoExtractor):
videopath = material['videopath'] videopath = material['videopath']
m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4') formats = self._extract_m3u8_formats(
m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False)
video_urlpart = videopath.split('/adaptive/')[1][:-5] video_urlpart = videopath.split('/adaptive/')[1][:-5]
PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4' PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
formats.extend([ PG_FORMATS = (
{ ('a2t', 512, 288),
'url': PG_URL_TEMPLATE % ('a2m', video_urlpart), ('a3t', 704, 400),
'format_id': 'pg-sd', ('nettv', 1280, 720),
}, )
{
'url': PG_URL_TEMPLATE % ('a3m', video_urlpart), def pg_format(format_id, width, height):
'format_id': 'pg-hd', return {
'quality': 0, 'url': PG_URL_TEMPLATE % (format_id, video_urlpart),
'format_id': 'pg-%s' % format_id,
'protocol': 'http',
'width': width,
'height': height,
} }
])
if not formats:
formats = [pg_format(*pg_tuple) for pg_tuple in PG_FORMATS]
else:
pg_formats = []
for format_id, width, height in PG_FORMATS:
try:
# Find hls format with the same width and height corresponding
# to progressive format and copy metadata from it.
f = next(f for f in formats
if f.get('width') == width and f.get('height') == height).copy()
f.update(pg_format(format_id, width, height))
pg_formats.append(f)
except StopIteration:
# Missing hls format does mean that no progressive format with
# such width and height exists either.
pass
formats.extend(pg_formats)
self._sort_formats(formats) self._sort_formats(formats)
thumbnails = [] thumbnails = []

View File

@ -18,6 +18,7 @@ class SciVeeIE(InfoExtractor):
'title': 'Adam Arkin at the 2014 DOE JGI Genomics of Energy & Environment Meeting', 'title': 'Adam Arkin at the 2014 DOE JGI Genomics of Energy & Environment Meeting',
'description': 'md5:81f1710638e11a481358fab1b11059d7', 'description': 'md5:81f1710638e11a481358fab1b11059d7',
}, },
'skip': 'Not accessible from Travis CI server',
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -159,11 +159,11 @@ class ThePlatformIE(ThePlatformBaseIE):
def str_to_hex(str): def str_to_hex(str):
return binascii.b2a_hex(str.encode('ascii')).decode('ascii') return binascii.b2a_hex(str.encode('ascii')).decode('ascii')
def hex_to_str(hex): def hex_to_bytes(hex):
return binascii.a2b_hex(hex) return binascii.a2b_hex(hex.encode('ascii'))
relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1) relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1)
clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path)) clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path))
checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest() checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
sig = flags + expiration_date + checksum + str_to_hex(sig_secret) sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
return '%s&sig=%s' % (url, sig) return '%s&sig=%s' % (url, sig)

View File

@ -32,7 +32,22 @@ class TwentyMinutenIE(InfoExtractor):
'title': '«Wir müssen mutig nach vorne schauen»', 'title': '«Wir müssen mutig nach vorne schauen»',
'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.', 'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.',
'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg' 'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg'
} },
'skip': '"This video is no longer available" is shown both on the web page and in the downloaded file.',
}, {
# YouTube embed
'url': 'http://www.20min.ch/ro/sports/football/story/Il-marque-une-bicyclette-de-plus-de-30-metres--21115184',
'md5': 'cec64d59aa01c0ed9dbba9cf639dd82f',
'info_dict': {
'id': 'ivM7A7SpDOs',
'ext': 'mp4',
'title': 'GOLAZO DE CHILENA DE JAVI GÓMEZ, FINALISTA AL BALÓN DE CLM 2016',
'description': 'md5:903c92fbf2b2f66c09de514bc25e9f5a',
'upload_date': '20160424',
'uploader': 'RTVCM Castilla-La Mancha',
'uploader_id': 'RTVCM',
},
'add_ie': ['Youtube'],
}, { }, {
'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738', 'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738',
'only_matching': True, 'only_matching': True,
@ -48,6 +63,12 @@ class TwentyMinutenIE(InfoExtractor):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
youtube_url = self._html_search_regex(
r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"',
webpage, 'YouTube embed URL', default=None)
if youtube_url is not None:
return self.url_result(youtube_url, 'Youtube')
title = self._html_search_regex( title = self._html_search_regex(
r'<h1>.*?<span>(.+?)</span></h1>', r'<h1>.*?<span>(.+?)</span></h1>',
webpage, 'title', default=None) webpage, 'title', default=None)

View File

@ -13,8 +13,12 @@ from ..utils import (
) )
class SnagFilmsEmbedIE(InfoExtractor): class ViewLiftBaseIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|embed)\.)?snagfilms\.com/embed/player\?.*\bfilmId=(?P<id>[\da-f-]{36})' _DOMAINS_REGEX = '(?:snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|monumentalsportsnetwork|vayafilm)\.com|kesari\.tv'
class ViewLiftEmbedIE(ViewLiftBaseIE):
_VALID_URL = r'https?://(?:(?:www|embed)\.)?(?:%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f-]{36})' % ViewLiftBaseIE._DOMAINS_REGEX
_TESTS = [{ _TESTS = [{
'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500', 'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
'md5': '2924e9215c6eff7a55ed35b72276bd93', 'md5': '2924e9215c6eff7a55ed35b72276bd93',
@ -40,7 +44,7 @@ class SnagFilmsEmbedIE(InfoExtractor):
@staticmethod @staticmethod
def _extract_url(webpage): def _extract_url(webpage):
mobj = re.search( mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?snagfilms\.com/embed/player.+?)\1', r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?(?:%s)/embed/player.+?)\1' % ViewLiftBaseIE._DOMAINS_REGEX,
webpage) webpage)
if mobj: if mobj:
return mobj.group('url') return mobj.group('url')
@ -55,6 +59,7 @@ class SnagFilmsEmbedIE(InfoExtractor):
'Film %s is not playable in your area.' % video_id, expected=True) 'Film %s is not playable in your area.' % video_id, expected=True)
formats = [] formats = []
has_bitrate = False
for source in self._parse_json(js_to_json(self._search_regex( for source in self._parse_json(js_to_json(self._search_regex(
r'(?s)sources:\s*(\[.+?\]),', webpage, 'json')), video_id): r'(?s)sources:\s*(\[.+?\]),', webpage, 'json')), video_id):
file_ = source.get('file') file_ = source.get('file')
@ -63,22 +68,25 @@ class SnagFilmsEmbedIE(InfoExtractor):
type_ = source.get('type') type_ = source.get('type')
ext = determine_ext(file_) ext = determine_ext(file_)
format_id = source.get('label') or ext format_id = source.get('label') or ext
if all(v == 'm3u8' for v in (type_, ext)): if all(v == 'm3u8' or v == 'hls' for v in (type_, ext)):
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
file_, video_id, 'mp4', m3u8_id='hls')) file_, video_id, 'mp4', m3u8_id='hls'))
else: else:
bitrate = int_or_none(self._search_regex( bitrate = int_or_none(self._search_regex(
[r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext], [r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext],
file_, 'bitrate', default=None)) file_, 'bitrate', default=None))
if not has_bitrate and bitrate:
has_bitrate = True
height = int_or_none(self._search_regex( height = int_or_none(self._search_regex(
r'^(\d+)[pP]$', format_id, 'height', default=None)) r'^(\d+)[pP]$', format_id, 'height', default=None))
formats.append({ formats.append({
'url': file_, 'url': file_,
'format_id': format_id, 'format_id': 'http-%s%s' % (format_id, ('-%dk' % bitrate if bitrate else '')),
'tbr': bitrate, 'tbr': bitrate,
'height': height, 'height': height,
}) })
self._sort_formats(formats) field_preference = None if has_bitrate else ('height', 'tbr', 'format_id')
self._sort_formats(formats, field_preference)
title = self._search_regex( title = self._search_regex(
[r"title\s*:\s*'([^']+)'", r'<title>([^<]+)</title>'], [r"title\s*:\s*'([^']+)'", r'<title>([^<]+)</title>'],
@ -91,8 +99,8 @@ class SnagFilmsEmbedIE(InfoExtractor):
} }
class SnagFilmsIE(InfoExtractor): class ViewLiftIE(ViewLiftBaseIE):
_VALID_URL = r'https?://(?:www\.)?snagfilms\.com/(?:films/title|show)/(?P<id>[^?#]+)' _VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)/(?:films/title|show|(?:news/)?videos?)/(?P<id>[^?#]+)' % ViewLiftBaseIE._DOMAINS_REGEX
_TESTS = [{ _TESTS = [{
'url': 'http://www.snagfilms.com/films/title/lost_for_life', 'url': 'http://www.snagfilms.com/films/title/lost_for_life',
'md5': '19844f897b35af219773fd63bdec2942', 'md5': '19844f897b35af219773fd63bdec2942',
@ -127,10 +135,16 @@ class SnagFilmsIE(InfoExtractor):
# Film is not available. # Film is not available.
'url': 'http://www.snagfilms.com/show/augie_alone/flirting', 'url': 'http://www.snagfilms.com/show/augie_alone/flirting',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.winnersview.com/videos/the-good-son',
'only_matching': True,
}, {
'url': 'http://www.kesari.tv/news/video/1461919076414',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) domain, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
@ -170,7 +184,7 @@ class SnagFilmsIE(InfoExtractor):
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': 'http://embed.snagfilms.com/embed/player?filmId=%s' % film_id, 'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
'id': film_id, 'id': film_id,
'display_id': display_id, 'display_id': display_id,
'title': title, 'title': title,
@ -178,4 +192,5 @@ class SnagFilmsIE(InfoExtractor):
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'duration': duration, 'duration': duration,
'categories': categories, 'categories': categories,
'ie_key': 'ViewLiftEmbed',
} }

View File

@ -6,7 +6,6 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_HTTPError, compat_HTTPError,
compat_urllib_parse,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
) )
from ..utils import ( from ..utils import (
@ -78,11 +77,11 @@ class ViewsterIE(InfoExtractor):
_ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01' _ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True): def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True, query={}):
request = sanitized_Request(url) request = sanitized_Request(url)
request.add_header('Accept', self._ACCEPT_HEADER) request.add_header('Accept', self._ACCEPT_HEADER)
request.add_header('Auth-token', self._AUTH_TOKEN) request.add_header('Auth-token', self._AUTH_TOKEN)
return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal) return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal, query=query)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -117,57 +116,85 @@ class ViewsterIE(InfoExtractor):
return self.playlist_result(entries, video_id, title, description) return self.playlist_result(entries, video_id, title, description)
formats = [] formats = []
manifest_url = None for language_set in info.get('LanguageSets', []):
for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'): manifest_url = None
media = self._download_json( m3u8_formats = []
'https://public-api.viewster.com/movies/%s/video?mediaType=%s' audio = language_set.get('Audio') or ''
% (entry_id, compat_urllib_parse.quote(media_type)), subtitle = language_set.get('Subtitle') or ''
video_id, 'Downloading %s JSON' % media_type, fatal=False) base_format_id = audio
if not media: if subtitle:
continue base_format_id += '-%s' % subtitle
video_url = media.get('Uri')
if not video_url:
continue
ext = determine_ext(video_url)
if ext == 'f4m':
manifest_url = video_url
video_url += '&' if '?' in video_url else '?'
video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1'
formats.extend(self._extract_f4m_formats(
video_url, video_id, f4m_id='hds'))
elif ext == 'm3u8':
manifest_url = video_url
m3u8_formats = self._extract_m3u8_formats(
video_url, video_id, 'mp4', m3u8_id='hls',
fatal=False) # m3u8 sometimes fail
if m3u8_formats:
formats.extend(m3u8_formats)
else:
qualities_basename = self._search_regex(
'/([^/]+)\.csmil/',
manifest_url, 'qualities basename', default=None)
if not qualities_basename:
continue
QUALITIES_RE = r'((,\d+k)+,?)'
qualities = self._search_regex(
QUALITIES_RE, qualities_basename,
'qualities', default=None)
if not qualities:
continue
qualities = qualities.strip(',').split(',')
http_template = re.sub(QUALITIES_RE, r'%s', qualities_basename)
http_url_basename = url_basename(video_url)
for q in qualities:
tbr = int_or_none(self._search_regex(
r'(\d+)k', q, 'bitrate', default=None))
formats.append({
'url': video_url.replace(http_url_basename, http_template % q),
'ext': 'mp4',
'format_id': 'http' + ('-%d' % tbr if tbr else ''),
'tbr': tbr,
})
if not formats and not info.get('LanguageSets') and not info.get('VODSettings'): def concat(suffix, sep='-'):
return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix
for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'):
media = self._download_json(
'https://public-api.viewster.com/movies/%s/video' % entry_id,
video_id, 'Downloading %s JSON' % concat(media_type, ' '), fatal=False, query={
'mediaType': media_type,
'language': audio,
'subtitle': subtitle,
})
if not media:
continue
video_url = media.get('Uri')
if not video_url:
continue
ext = determine_ext(video_url)
if ext == 'f4m':
manifest_url = video_url
video_url += '&' if '?' in video_url else '?'
video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1'
formats.extend(self._extract_f4m_formats(
video_url, video_id, f4m_id=concat('hds')))
elif ext == 'm3u8':
manifest_url = video_url
m3u8_formats = self._extract_m3u8_formats(
video_url, video_id, 'mp4', m3u8_id=concat('hls'),
fatal=False) # m3u8 sometimes fail
if m3u8_formats:
formats.extend(m3u8_formats)
else:
qualities_basename = self._search_regex(
'/([^/]+)\.csmil/',
manifest_url, 'qualities basename', default=None)
if not qualities_basename:
continue
QUALITIES_RE = r'((,\d+k)+,?)'
qualities = self._search_regex(
QUALITIES_RE, qualities_basename,
'qualities', default=None)
if not qualities:
continue
qualities = list(map(lambda q: int(q[:-1]), qualities.strip(',').split(',')))
qualities.sort()
http_template = re.sub(QUALITIES_RE, r'%dk', qualities_basename)
http_url_basename = url_basename(video_url)
if m3u8_formats:
self._sort_formats(m3u8_formats)
m3u8_formats = list(filter(
lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
m3u8_formats))
if len(qualities) == len(m3u8_formats):
for q, m3u8_format in zip(qualities, m3u8_formats):
f = m3u8_format.copy()
f.update({
'url': video_url.replace(http_url_basename, http_template % q),
'format_id': f['format_id'].replace('hls', 'http'),
'protocol': 'http',
})
formats.append(f)
else:
for q in qualities:
formats.append({
'url': video_url.replace(http_url_basename, http_template % q),
'ext': 'mp4',
'format_id': 'http-%d' % q,
'tbr': q,
})
if not formats and not info.get('VODSettings'):
self.raise_geo_restricted() self.raise_geo_restricted()
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -1,9 +1,13 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import division, unicode_literals
import re
import time
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
dict_get, dict_get,
ExtractorError,
float_or_none, float_or_none,
int_or_none, int_or_none,
) )
@ -31,16 +35,76 @@ class VLiveIE(InfoExtractor):
webpage = self._download_webpage( webpage = self._download_webpage(
'http://www.vlive.tv/video/%s' % video_id, video_id) 'http://www.vlive.tv/video/%s' % video_id, video_id)
long_video_id = self._search_regex( # UTC+x - UTC+9 (KST)
r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s*"[0-9]+"\s*,\s*"[^"]*"\s*,\s*"([^"]+)"', tz = time.altzone if time.localtime().tm_isdst == 1 else time.timezone
webpage, 'long video id') tz_offset = -tz // 60 - 9 * 60
self._set_cookie('vlive.tv', 'timezoneOffset', '%d' % tz_offset)
key = self._search_regex( status_params = self._download_json(
r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s*"[0-9]+"\s*,\s*"[^"]*"\s*,\s*"[^"]+"\s*,\s*"([^"]+)"', 'http://www.vlive.tv/video/status?videoSeq=%s' % video_id,
webpage, 'key') video_id, 'Downloading JSON status',
headers={'Referer': url})
status = status_params.get('status')
air_start = status_params.get('onAirStartAt', '')
is_live = status_params.get('isLive')
video_params = self._search_regex(
r'vlive\.tv\.video\.ajax\.request\.handler\.init\((.+)\)',
webpage, 'video params')
live_params, long_video_id, key = re.split(
r'"\s*,\s*"', video_params)[1:4]
if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR':
live_params = self._parse_json('"%s"' % live_params, video_id)
live_params = self._parse_json(live_params, video_id)
return self._live(video_id, webpage, live_params)
elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO':
if long_video_id and key:
return self._replay(video_id, webpage, long_video_id, key)
elif is_live:
status = 'LIVE_END'
else:
status = 'COMING_SOON'
if status == 'LIVE_END':
raise ExtractorError('Uploading for replay. Please wait...',
expected=True)
elif status == 'COMING_SOON':
raise ExtractorError('Coming soon! %s' % air_start, expected=True)
elif status == 'CANCELED':
raise ExtractorError('We are sorry, '
'but the live broadcast has been canceled.',
expected=True)
else:
raise ExtractorError('Unknown status %s' % status)
def _get_common_fields(self, webpage):
title = self._og_search_title(webpage) title = self._og_search_title(webpage)
creator = self._html_search_regex(
r'<div[^>]+class="info_area"[^>]*>\s*<a\s+[^>]*>([^<]+)',
webpage, 'creator', fatal=False)
thumbnail = self._og_search_thumbnail(webpage)
return {
'title': title,
'creator': creator,
'thumbnail': thumbnail,
}
def _live(self, video_id, webpage, live_params):
formats = []
for vid in live_params.get('resolutions', []):
formats.extend(self._extract_m3u8_formats(
vid['cdnUrl'], video_id, 'mp4',
m3u8_id=vid.get('name'),
fatal=False, live=True))
self._sort_formats(formats)
return dict(self._get_common_fields(webpage),
id=video_id,
formats=formats,
is_live=True)
def _replay(self, video_id, webpage, long_video_id, key):
playinfo = self._download_json( playinfo = self._download_json(
'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s' 'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s'
% compat_urllib_parse_urlencode({ % compat_urllib_parse_urlencode({
@ -62,11 +126,6 @@ class VLiveIE(InfoExtractor):
} for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')] } for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')]
self._sort_formats(formats) self._sort_formats(formats)
thumbnail = self._og_search_thumbnail(webpage)
creator = self._html_search_regex(
r'<div[^>]+class="info_area"[^>]*>\s*<a\s+[^>]*>([^<]+)',
webpage, 'creator', fatal=False)
view_count = int_or_none(playinfo.get('meta', {}).get('count')) view_count = int_or_none(playinfo.get('meta', {}).get('count'))
subtitles = {} subtitles = {}
@ -77,12 +136,8 @@ class VLiveIE(InfoExtractor):
'ext': 'vtt', 'ext': 'vtt',
'url': caption['source']}] 'url': caption['source']}]
return { return dict(self._get_common_fields(webpage),
'id': video_id, id=video_id,
'title': title, formats=formats,
'creator': creator, view_count=view_count,
'thumbnail': thumbnail, subtitles=subtitles)
'view_count': view_count,
'formats': formats,
'subtitles': subtitles,
}

View File

@ -11,61 +11,27 @@ from ..utils import (
) )
class SexyKarmaIE(InfoExtractor): class WatchIndianPornIE(InfoExtractor):
IE_DESC = 'Sexy Karma and Watch Indian Porn' IE_DESC = 'Watch Indian Porn'
_VALID_URL = r'https?://(?:www\.)?(?:sexykarma\.com|watchindianporn\.net)/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html' _VALID_URL = r'https?://(?:www\.)?watchindianporn\.net/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html'
_TESTS = [{ _TEST = {
'url': 'http://www.sexykarma.com/gonewild/video/taking-a-quick-pee-yHI70cOyIHt.html', 'url': 'http://www.watchindianporn.net/video/hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera-RZa2avywNPa.html',
'md5': 'b9798e7d1ef1765116a8f516c8091dbd', 'md5': '249589a164dde236ec65832bfce17440',
'info_dict': { 'info_dict': {
'id': 'yHI70cOyIHt', 'id': 'RZa2avywNPa',
'display_id': 'taking-a-quick-pee', 'display_id': 'hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Taking a quick pee.', 'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera',
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'wildginger7', 'uploader': 'LoveJay',
'upload_date': '20141008', 'upload_date': '20160428',
'duration': 22, 'duration': 226,
'view_count': int, 'view_count': int,
'comment_count': int, 'comment_count': int,
'categories': list, 'categories': list,
'age_limit': 18, 'age_limit': 18,
} }
}, { }
'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html',
'md5': 'dd216c68d29b49b12842b9babe762a5d',
'info_dict': {
'id': '8Id6EZPbuHf',
'display_id': 'pot-pixie-tribute',
'ext': 'mp4',
'title': 'pot_pixie tribute',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'banffite',
'upload_date': '20141013',
'duration': 16,
'view_count': int,
'comment_count': int,
'categories': list,
'age_limit': 18,
}
}, {
'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html',
'md5': '9afb80675550406ed9a63ac2819ef69d',
'info_dict': {
'id': 'dW2mtctxJfs',
'display_id': 'desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number',
'ext': 'mp4',
'title': 'Desi dancer namrata stripping completely nude and dancing on a hot number',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'Don',
'upload_date': '20140213',
'duration': 83,
'view_count': int,
'comment_count': int,
'categories': list,
'age_limit': 18,
}
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -109,6 +75,9 @@ class SexyKarmaIE(InfoExtractor):
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'url': video_url, 'url': video_url,
'http_headers': {
'Referer': url,
},
'title': title, 'title': title,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'uploader': uploader, 'uploader': uploader,

View File

@ -4,16 +4,22 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
float_or_none,
unified_strdate, unified_strdate,
) )
class WSJIE(InfoExtractor): class WSJIE(InfoExtractor):
_VALID_URL = r'https?://video-api\.wsj\.com/api-video/player/iframe\.html\?guid=(?P<id>[a-zA-Z0-9-]+)' _VALID_URL = r'''(?x)https?://
(?:
video-api\.wsj\.com/api-video/player/iframe\.html\?guid=|
(?:www\.)?wsj\.com/video/[^/]+/
)
(?P<id>[a-zA-Z0-9-]+)'''
IE_DESC = 'Wall Street Journal' IE_DESC = 'Wall Street Journal'
_TEST = { _TESTS = [{
'url': 'http://video-api.wsj.com/api-video/player/iframe.html?guid=1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A', 'url': 'http://video-api.wsj.com/api-video/player/iframe.html?guid=1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
'md5': '9747d7a6ebc2f4df64b981e1dde9efa9', 'md5': 'e230a5bb249075e40793b655a54a02e4',
'info_dict': { 'info_dict': {
'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A', 'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
'ext': 'mp4', 'ext': 'mp4',
@ -24,65 +30,60 @@ class WSJIE(InfoExtractor):
'duration': 90, 'duration': 90,
'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo', 'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo',
}, },
} }, {
'url': 'http://www.wsj.com/video/can-alphabet-build-a-smarter-city/359DDAA8-9AC1-489C-82E6-0429C1E430E0.html',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
bitrates = [128, 174, 264, 320, 464, 664, 1264]
api_url = ( api_url = (
'http://video-api.wsj.com/api-video/find_all_videos.asp?' 'http://video-api.wsj.com/api-video/find_all_videos.asp?'
'type=guid&count=1&query=%s&' 'type=guid&count=1&query=%s&fields=type,hls,videoMP4List,'
'fields=hls,adZone,thumbnailList,guid,state,secondsUntilStartTime,' 'thumbnailList,author,description,name,duration,videoURL,'
'author,description,name,linkURL,videoStillURL,duration,videoURL,' 'titletag,formattedCreationDate,keywords,editor' % video_id)
'adCategory,catastrophic,linkShortURL,doctypeID,youtubeID,'
'titletag,rssURL,wsj-section,wsj-subsection,allthingsd-section,'
'allthingsd-subsection,sm-section,sm-subsection,provider,'
'formattedCreationDate,keywords,keywordsOmniture,column,editor,'
'emailURL,emailPartnerID,showName,omnitureProgramName,'
'omnitureVideoFormat,linkRelativeURL,touchCastID,'
'omniturePublishDate,%s') % (
video_id, ','.join('video%dkMP4Url' % br for br in bitrates))
info = self._download_json(api_url, video_id)['items'][0] info = self._download_json(api_url, video_id)['items'][0]
# Thumbnails are conveniently in the correct format already
thumbnails = info.get('thumbnailList')
creator = info.get('author')
uploader_id = info.get('editor')
categories = info.get('keywords')
duration = int_or_none(info.get('duration'))
upload_date = unified_strdate(
info.get('formattedCreationDate'), day_first=False)
title = info.get('name', info.get('titletag')) title = info.get('name', info.get('titletag'))
formats = [{ formats = []
'format_id': 'f4m',
'format_note': 'f4m (meta URL)', f4m_url = info.get('videoURL')
'url': info['videoURL'], if f4m_url:
}] formats.extend(self._extract_f4m_formats(
if info.get('hls'): f4m_url, video_id, f4m_id='hds', fatal=False))
m3u8_url = info.get('hls')
if m3u8_url:
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
info['hls'], video_id, ext='mp4', info['hls'], video_id, ext='mp4',
preference=0, entry_protocol='m3u8_native')) entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
for br in bitrates:
field = 'video%dkMP4Url' % br for v in info.get('videoMP4List', []):
if info.get(field): mp4_url = v.get('url')
formats.append({ if not mp4_url:
'format_id': 'mp4-%d' % br, continue
'container': 'mp4', tbr = int_or_none(v.get('bitrate'))
'tbr': br, formats.append({
'url': info[field], 'url': mp4_url,
}) 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
'tbr': tbr,
'width': int_or_none(v.get('width')),
'height': int_or_none(v.get('height')),
'fps': float_or_none(v.get('fps')),
})
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'thumbnails': thumbnails, # Thumbnails are conveniently in the correct format already
'creator': creator, 'thumbnails': info.get('thumbnailList'),
'uploader_id': uploader_id, 'creator': info.get('author'),
'duration': duration, 'uploader_id': info.get('editor'),
'upload_date': upload_date, 'duration': int_or_none(info.get('duration')),
'upload_date': unified_strdate(info.get(
'formattedCreationDate'), day_first=False),
'title': title, 'title': title,
'categories': categories, 'categories': info.get('keywords'),
} }

View File

@ -18,9 +18,23 @@ from ..utils import (
class YandexMusicBaseIE(InfoExtractor): class YandexMusicBaseIE(InfoExtractor):
@staticmethod @staticmethod
def _handle_error(response): def _handle_error(response):
error = response.get('error') if isinstance(response, dict):
if error: error = response.get('error')
raise ExtractorError(error, expected=True) if error:
raise ExtractorError(error, expected=True)
def _download_webpage(self, *args, **kwargs):
webpage = super(YandexMusicBaseIE, self)._download_webpage(*args, **kwargs)
if 'Нам очень жаль, но&nbsp;запросы, поступившие с&nbsp;вашего IP-адреса, похожи на&nbsp;автоматические.' in webpage:
raise ExtractorError(
'YandexMusic has considered youtube-dl requests automated and '
'asks you to solve a CAPTCHA. You can either wait for some '
'time until unblocked and optionally use --sleep-interval '
'in future or alternatively you can go to https://music.yandex.ru/ '
'solve CAPTCHA, then export cookies and pass cookie file to '
'youtube-dl with --cookies',
expected=True)
return webpage
def _download_json(self, *args, **kwargs): def _download_json(self, *args, **kwargs):
response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs) response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs)
@ -47,7 +61,8 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
'album_artist': 'Carlo Ambrosio', 'album_artist': 'Carlo Ambrosio',
'artist': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio', 'artist': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio',
'release_year': '2009', 'release_year': '2009',
} },
'skip': 'Travis CI servers blocked by YandexMusic',
} }
def _get_track_url(self, storage_dir, track_id): def _get_track_url(self, storage_dir, track_id):
@ -139,6 +154,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
'title': 'Carlo Ambrosio - Gypsy Soul (2009)', 'title': 'Carlo Ambrosio - Gypsy Soul (2009)',
}, },
'playlist_count': 50, 'playlist_count': 50,
'skip': 'Travis CI servers blocked by YandexMusic',
} }
def _real_extract(self, url): def _real_extract(self, url):
@ -171,6 +187,7 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9', 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
}, },
'playlist_count': 6, 'playlist_count': 6,
'skip': 'Travis CI servers blocked by YandexMusic',
}, { }, {
# playlist exceeding the limit of 150 tracks shipped with webpage (see # playlist exceeding the limit of 150 tracks shipped with webpage (see
# https://github.com/rg3/youtube-dl/issues/6666) # https://github.com/rg3/youtube-dl/issues/6666)
@ -180,6 +197,7 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
'title': 'Музыка 90-х', 'title': 'Музыка 90-х',
}, },
'playlist_count': 310, 'playlist_count': 310,
'skip': 'Travis CI servers blocked by YandexMusic',
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -2139,10 +2139,11 @@ class YoutubeSearchDateIE(YoutubeSearchIE):
_EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'} _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
class YoutubeSearchURLIE(InfoExtractor): class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):
IE_DESC = 'YouTube.com search URLs' IE_DESC = 'YouTube.com search URLs'
IE_NAME = 'youtube:search_url' IE_NAME = 'youtube:search_url'
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)' _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
_TESTS = [{ _TESTS = [{
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
'playlist_mincount': 5, 'playlist_mincount': 5,
@ -2157,32 +2158,8 @@ class YoutubeSearchURLIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
query = compat_urllib_parse_unquote_plus(mobj.group('query')) query = compat_urllib_parse_unquote_plus(mobj.group('query'))
webpage = self._download_webpage(url, query) webpage = self._download_webpage(url, query)
result_code = self._search_regex( return self.playlist_result(self._process_page(webpage), playlist_title=query)
r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
part_codes = re.findall(
r'(?s)<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*>(.*?)</h3>', result_code)
entries = []
for part_code in part_codes:
part_title = self._html_search_regex(
[r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
part_url_snippet = self._html_search_regex(
r'(?s)href="([^"]+)"', part_code, 'item URL')
part_url = compat_urlparse.urljoin(
'https://www.youtube.com/', part_url_snippet)
entries.append({
'_type': 'url',
'url': part_url,
'title': part_title,
})
return {
'_type': 'playlist',
'entries': entries,
'title': query,
}
class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor): class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):