This commit is contained in:
Gilles Habran 2016-04-01 16:09:09 +02:00
commit ca44adda06
19 changed files with 392 additions and 215 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.03.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.03.27** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.01**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.03.27 [debug] youtube-dl version 2016.04.01
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -118,6 +118,7 @@
- **Clubic** - **Clubic**
- **Clyp** - **Clyp**
- **cmt.com** - **cmt.com**
- **CNBC**
- **CNET** - **CNET**
- **CNN** - **CNN**
- **CNNArticle** - **CNNArticle**
@ -134,6 +135,7 @@
- **CrooksAndLiars** - **CrooksAndLiars**
- **Crunchyroll** - **Crunchyroll**
- **crunchyroll:playlist** - **crunchyroll:playlist**
- **CSNNE**
- **CSpan**: C-SPAN - **CSpan**: C-SPAN
- **CtsNews**: 華視新聞 - **CtsNews**: 華視新聞
- **culturebox.francetvinfo.fr** - **culturebox.francetvinfo.fr**
@ -376,7 +378,8 @@
- **myvideo** (Currently broken) - **myvideo** (Currently broken)
- **MyVidster** - **MyVidster**
- **n-tv.de** - **n-tv.de**
- **NationalGeographic** - **natgeo**
- **natgeo:channel**
- **Naver** - **Naver**
- **NBA** - **NBA**
- **NBC** - **NBC**
@ -618,7 +621,6 @@
- **Telegraaf** - **Telegraaf**
- **TeleMB** - **TeleMB**
- **TeleTask** - **TeleTask**
- **TenPlay**
- **TF1** - **TF1**
- **TheIntercept** - **TheIntercept**
- **TheOnion** - **TheOnion**
@ -740,6 +742,7 @@
- **vlive** - **vlive**
- **Vodlocker** - **Vodlocker**
- **VoiceRepublic** - **VoiceRepublic**
- **VoxMedia**
- **Vporn** - **Vporn**
- **vpro**: npo.nl and ntr.nl - **vpro**: npo.nl and ntr.nl
- **VRT** - **VRT**

View File

@ -438,7 +438,10 @@ from .myspass import MySpassIE
from .myvi import MyviIE from .myvi import MyviIE
from .myvideo import MyVideoIE from .myvideo import MyVideoIE
from .myvidster import MyVidsterIE from .myvidster import MyVidsterIE
from .nationalgeographic import NationalGeographicIE from .nationalgeographic import (
NationalGeographicIE,
NationalGeographicChannelIE,
)
from .naver import NaverIE from .naver import NaverIE
from .nba import NBAIE from .nba import NBAIE
from .nbc import ( from .nbc import (
@ -737,7 +740,6 @@ from .telecinco import TelecincoIE
from .telegraaf import TelegraafIE from .telegraaf import TelegraafIE
from .telemb import TeleMBIE from .telemb import TeleMBIE
from .teletask import TeleTaskIE from .teletask import TeleTaskIE
from .tenplay import TenPlayIE
from .testurl import TestURLIE from .testurl import TestURLIE
from .tf1 import TF1IE from .tf1 import TF1IE
from .theintercept import TheInterceptIE from .theintercept import TheInterceptIE
@ -902,6 +904,7 @@ from .vk import (
from .vlive import VLiveIE from .vlive import VLiveIE
from .vodlocker import VodlockerIE from .vodlocker import VodlockerIE
from .voicerepublic import VoiceRepublicIE from .voicerepublic import VoiceRepublicIE
from .voxmedia import VoxMediaIE
from .vporn import VpornIE from .vporn import VpornIE
from .vrt import VRTIE from .vrt import VRTIE
from .vube import VubeIE from .vube import VubeIE

View File

@ -1,13 +1,19 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import smuggle_url from ..utils import (
smuggle_url,
update_url_query,
unescapeHTML,
)
class AENetworksIE(InfoExtractor): class AENetworksIE(InfoExtractor):
IE_NAME = 'aenetworks' IE_NAME = 'aenetworks'
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network' IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
_VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])' _VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?P<type>[^/]+)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false', 'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
@ -25,16 +31,13 @@ class AENetworksIE(InfoExtractor):
'expected_warnings': ['JSON-LD'], 'expected_warnings': ['JSON-LD'],
}, { }, {
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
'md5': '8ff93eb073449f151d6b90c0ae1ef0c7',
'info_dict': { 'info_dict': {
'id': 'eg47EERs_JsZ', 'id': 'eg47EERs_JsZ',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Winter Is Coming', 'title': 'Winter Is Coming',
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74', 'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
}, },
'params': {
# m3u8 download
'skip_download': True,
},
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
}, { }, {
'url': 'http://www.aetv.com/shows/duck-dynasty/video/inlawful-entry', 'url': 'http://www.aetv.com/shows/duck-dynasty/video/inlawful-entry',
@ -48,7 +51,7 @@ class AENetworksIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) page_type, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
@ -56,11 +59,22 @@ class AENetworksIE(InfoExtractor):
r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id, r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
r"media_url\s*=\s*'([^']+)'" r"media_url\s*=\s*'([^']+)'"
] ]
video_url = self._search_regex(video_url_re, webpage, 'video url') video_url = unescapeHTML(self._search_regex(video_url_re, webpage, 'video url'))
query = {'mbr': 'true'}
if page_type == 'shows':
query['assetTypes'] = 'medium_video_s3'
if 'switch=hds' in video_url:
query['switch'] = 'hls'
info = self._search_json_ld(webpage, video_id, fatal=False) info = self._search_json_ld(webpage, video_id, fatal=False)
info.update({ info.update({
'_type': 'url_transparent', '_type': 'url_transparent',
'url': smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}}), 'url': smuggle_url(update_url_query(
video_url, query), {
'sig': {
'key': 'crazyjava',
'secret': 's3cr3t'},
'force_smil_url': True
}),
}) })
return info return info

View File

@ -34,7 +34,7 @@ class BeegIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
video = self._download_json( video = self._download_json(
'https://api.beeg.com/api/v5/video/%s' % video_id, video_id) 'https://api.beeg.com/api/v6/1738/video/%s' % video_id, video_id)
def split(o, e): def split(o, e):
def cut(s, x): def cut(s, x):
@ -50,8 +50,8 @@ class BeegIE(InfoExtractor):
return n return n
def decrypt_key(key): def decrypt_key(key):
# Reverse engineered from http://static.beeg.com/cpl/1105.js # Reverse engineered from http://static.beeg.com/cpl/1738.js
a = '5ShMcIQlssOd7zChAIOlmeTZDaUxULbJRnywYaiB' a = 'GUuyodcfS8FW8gQp4OKLMsZBcX0T7B'
e = compat_urllib_parse_unquote(key) e = compat_urllib_parse_unquote(key)
o = ''.join([ o = ''.join([
compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21) compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21)

View File

@ -439,7 +439,7 @@ class BrightcoveNewIE(InfoExtractor):
</video>.*? </video>.*?
<script[^>]+ <script[^>]+
src=["\'](?:https?:)?//players\.brightcove\.net/ src=["\'](?:https?:)?//players\.brightcove\.net/
(\d+)/([\da-f-]+)_([^/]+)/index(?:\.min)?\.js (\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js
''', webpage): ''', webpage):
entries.append( entries.append(
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'

View File

@ -1,21 +1,35 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .theplatform import ThePlatformIE
from ..utils import ( from ..utils import (
sanitized_Request, xpath_text,
smuggle_url, xpath_element,
int_or_none,
ExtractorError,
find_xpath_attr,
) )
class CBSIE(InfoExtractor): class CBSBaseIE(ThePlatformIE):
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL')
return {
'en': [{
'ext': 'ttml',
'url': closed_caption_e.attrib['value'],
}]
} if closed_caption_e is not None and closed_caption_e.attrib.get('value') else []
class CBSIE(CBSBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)' _VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', 'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
'info_dict': { 'info_dict': {
'id': '4JUVEwq3wUT7', 'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_',
'display_id': 'connect-chat-feat-garth-brooks', 'display_id': 'connect-chat-feat-garth-brooks',
'ext': 'flv', 'ext': 'mp4',
'title': 'Connect Chat feat. Garth Brooks', 'title': 'Connect Chat feat. Garth Brooks',
'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!', 'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
'duration': 1495, 'duration': 1495,
@ -47,22 +61,46 @@ class CBSIE(InfoExtractor):
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/', 'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
'only_matching': True, 'only_matching': True,
}] }]
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?manifest=m3u&mbr=true'
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
request = sanitized_Request(url) webpage = self._download_webpage(url, display_id)
# Android UA is served with higher quality (720p) streams (see content_id = self._search_regex(
# https://github.com/rg3/youtube-dl/issues/7490) [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],
request.add_header('User-Agent', 'Mozilla/5.0 (Linux; Android 4.4; Nexus 5)') webpage, 'content id')
webpage = self._download_webpage(request, display_id) items_data = self._download_xml(
real_id = self._search_regex( 'http://can.cbs.com/thunder/player/videoPlayerService.php',
[r"video\.settings\.pid\s*=\s*'([^']+)';", r"cbsplayer\.pid\s*=\s*'([^']+)';"], content_id, query={'partner': 'cbs', 'contentId': content_id})
webpage, 'real video ID') video_data = xpath_element(items_data, './/item')
return { title = xpath_text(video_data, 'videoTitle', 'title', True)
'_type': 'url_transparent',
'ie_key': 'ThePlatform', subtitles = {}
'url': smuggle_url( formats = []
'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true&manifest=m3u' % real_id, for item in items_data.findall('.//item'):
{'force_smil_url': True}), pid = xpath_text(item, 'pid')
if not pid:
continue
try:
tp_formats, tp_subtitles = self._extract_theplatform_smil(
self.TP_RELEASE_URL_TEMPLATE % pid, content_id, 'Downloading %s SMIL data' % pid)
except ExtractorError:
continue
formats.extend(tp_formats)
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
self._sort_formats(formats)
info = self.get_metadata('dJ5BDC/media/guid/2198311517/%s' % content_id, content_id)
info.update({
'id': content_id,
'display_id': display_id, 'display_id': display_id,
} 'title': title,
'series': xpath_text(video_data, 'seriesTitle'),
'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
'thumbnail': xpath_text(video_data, 'previewImageURL'),
'formats': formats,
'subtitles': subtitles,
})
return info

View File

@ -2,14 +2,14 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from .theplatform import ThePlatformIE from .cbs import CBSBaseIE
from ..utils import ( from ..utils import (
parse_duration, parse_duration,
find_xpath_attr, find_xpath_attr,
) )
class CBSNewsIE(ThePlatformIE): class CBSNewsIE(CBSBaseIE):
IE_DESC = 'CBS News' IE_DESC = 'CBS News'
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)' _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
@ -49,15 +49,6 @@ class CBSNewsIE(ThePlatformIE):
}, },
] ]
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL')
return {
'en': [{
'ext': 'ttml',
'url': closed_caption_e.attrib['value'],
}]
} if closed_caption_e is not None and closed_caption_e.attrib.get('value') else []
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)

View File

@ -22,6 +22,7 @@ from ..compat import (
compat_str, compat_str,
compat_urllib_error, compat_urllib_error,
compat_urllib_parse_urlencode, compat_urllib_parse_urlencode,
compat_urllib_request,
compat_urlparse, compat_urlparse,
) )
from ..downloader.f4m import remove_encrypted_media from ..downloader.f4m import remove_encrypted_media
@ -49,6 +50,7 @@ from ..utils import (
determine_protocol, determine_protocol,
parse_duration, parse_duration,
mimetype2ext, mimetype2ext,
update_Request,
update_url_query, update_url_query,
) )
@ -347,7 +349,7 @@ class InfoExtractor(object):
def IE_NAME(self): def IE_NAME(self):
return compat_str(type(self).__name__[:-2]) return compat_str(type(self).__name__[:-2])
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None): def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
""" Returns the response handle """ """ Returns the response handle """
if note is None: if note is None:
self.report_download_webpage(video_id) self.report_download_webpage(video_id)
@ -357,11 +359,14 @@ class InfoExtractor(object):
else: else:
self.to_screen('%s: %s' % (video_id, note)) self.to_screen('%s: %s' % (video_id, note))
# data, headers and query params will be ignored for `Request` objects # data, headers and query params will be ignored for `Request` objects
if isinstance(url_or_request, compat_str): if isinstance(url_or_request, compat_urllib_request.Request):
url_or_request = update_Request(
url_or_request, data=data, headers=headers, query=query)
else:
if query: if query:
url_or_request = update_url_query(url_or_request, query) url_or_request = update_url_query(url_or_request, query)
if data or headers: if data or headers:
url_or_request = sanitized_Request(url_or_request, data, headers or {}) url_or_request = sanitized_Request(url_or_request, data, headers)
try: try:
return self._downloader.urlopen(url_or_request) return self._downloader.urlopen(url_or_request)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
@ -377,7 +382,7 @@ class InfoExtractor(object):
self._downloader.report_warning(errmsg) self._downloader.report_warning(errmsg)
return False return False
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers=None, query=None): def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}):
""" Returns a tuple (page content as string, URL handle) """ """ Returns a tuple (page content as string, URL handle) """
# Strip hashes from the URL (#1038) # Strip hashes from the URL (#1038)
if isinstance(url_or_request, (compat_str, str)): if isinstance(url_or_request, (compat_str, str)):
@ -470,7 +475,7 @@ class InfoExtractor(object):
return content return content
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers=None, query=None): def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers={}, query={}):
""" Returns the data of the page as a string """ """ Returns the data of the page as a string """
success = False success = False
try_count = 0 try_count = 0
@ -491,7 +496,7 @@ class InfoExtractor(object):
def _download_xml(self, url_or_request, video_id, def _download_xml(self, url_or_request, video_id,
note='Downloading XML', errnote='Unable to download XML', note='Downloading XML', errnote='Unable to download XML',
transform_source=None, fatal=True, encoding=None, data=None, headers=None, query=None): transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}):
"""Return the xml as an xml.etree.ElementTree.Element""" """Return the xml as an xml.etree.ElementTree.Element"""
xml_string = self._download_webpage( xml_string = self._download_webpage(
url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query) url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query)
@ -505,7 +510,7 @@ class InfoExtractor(object):
note='Downloading JSON metadata', note='Downloading JSON metadata',
errnote='Unable to download JSON metadata', errnote='Unable to download JSON metadata',
transform_source=None, transform_source=None,
fatal=True, encoding=None, data=None, headers=None, query=None): fatal=True, encoding=None, data=None, headers={}, query={}):
json_string = self._download_webpage( json_string = self._download_webpage(
url_or_request, video_id, note, errnote, fatal=fatal, url_or_request, video_id, note, errnote, fatal=fatal,
encoding=encoding, data=data, headers=headers, query=query) encoding=encoding, data=data, headers=headers, query=query)

View File

@ -406,19 +406,6 @@ class GenericIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, },
# multiple ooyala embeds on SBN network websites
{
'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
'info_dict': {
'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
},
'playlist_mincount': 3,
'params': {
'skip_download': True,
},
'add_ie': ['Ooyala'],
},
# embed.ly video # embed.ly video
{ {
'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/', 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',

View File

@ -6,6 +6,7 @@ from ..utils import (
int_or_none, int_or_none,
js_to_json, js_to_json,
unescapeHTML, unescapeHTML,
determine_ext,
) )
@ -39,7 +40,7 @@ class HowStuffWorksIE(InfoExtractor):
'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm', 'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm',
'info_dict': { 'info_dict': {
'id': '440011', 'id': '440011',
'ext': 'flv', 'ext': 'mp4',
'title': 'Sword Swallowing #1 by Dan Meyer', 'title': 'Sword Swallowing #1 by Dan Meyer',
'description': 'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org>', 'description': 'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org>',
'display_id': 'sword-swallowing-1-by-dan-meyer', 'display_id': 'sword-swallowing-1-by-dan-meyer',
@ -63,13 +64,19 @@ class HowStuffWorksIE(InfoExtractor):
video_id = clip_info['content_id'] video_id = clip_info['content_id']
formats = [] formats = []
m3u8_url = clip_info.get('m3u8') m3u8_url = clip_info.get('m3u8')
if m3u8_url: if m3u8_url and determine_ext(m3u8_url) == 'm3u8':
formats += self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', format_id='hls', fatal=True))
flv_url = clip_info.get('flv_url')
if flv_url:
formats.append({
'url': flv_url,
'format_id': 'flv',
})
for video in clip_info.get('mp4', []): for video in clip_info.get('mp4', []):
formats.append({ formats.append({
'url': video['src'], 'url': video['src'],
'format_id': video['bitrate'], 'format_id': 'mp4-%s' % video['bitrate'],
'vbr': int(video['bitrate'].rstrip('k')), 'vbr': int_or_none(video['bitrate'].rstrip('k')),
}) })
if not formats: if not formats:
@ -102,6 +109,6 @@ class HowStuffWorksIE(InfoExtractor):
'title': unescapeHTML(clip_info['clip_title']), 'title': unescapeHTML(clip_info['clip_title']),
'description': unescapeHTML(clip_info.get('caption')), 'description': unescapeHTML(clip_info.get('caption')),
'thumbnail': clip_info.get('video_still_url'), 'thumbnail': clip_info.get('video_still_url'),
'duration': clip_info.get('duration'), 'duration': int_or_none(clip_info.get('duration')),
'formats': formats, 'formats': formats,
} }

View File

@ -4,18 +4,21 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
smuggle_url, smuggle_url,
url_basename, url_basename,
update_url_query,
) )
class NationalGeographicIE(InfoExtractor): class NationalGeographicIE(InfoExtractor):
IE_NAME = 'natgeo'
_VALID_URL = r'https?://video\.nationalgeographic\.com/.*?' _VALID_URL = r'https?://video\.nationalgeographic\.com/.*?'
_TESTS = [ _TESTS = [
{ {
'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo', 'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
'md5': '730855d559abbad6b42c2be1fa584917',
'info_dict': { 'info_dict': {
'id': '4DmDACA6Qtk_', 'id': '0000014b-70a1-dd8c-af7f-f7b559330001',
'ext': 'flv', 'ext': 'mp4',
'title': 'Mating Crabs Busted by Sharks', 'title': 'Mating Crabs Busted by Sharks',
'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3', 'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
}, },
@ -23,9 +26,10 @@ class NationalGeographicIE(InfoExtractor):
}, },
{ {
'url': 'http://video.nationalgeographic.com/wild/when-sharks-attack/the-real-jaws', 'url': 'http://video.nationalgeographic.com/wild/when-sharks-attack/the-real-jaws',
'md5': '6a3105eb448c070503b3105fb9b320b5',
'info_dict': { 'info_dict': {
'id': '_JeBD_D7PlS5', 'id': 'ngc-I0IauNSWznb_UV008GxSbwY35BZvgi2e',
'ext': 'flv', 'ext': 'mp4',
'title': 'The Real Jaws', 'title': 'The Real Jaws',
'description': 'md5:8d3e09d9d53a85cd397b4b21b2c77be6', 'description': 'md5:8d3e09d9d53a85cd397b4b21b2c77be6',
}, },
@ -37,18 +41,61 @@ class NationalGeographicIE(InfoExtractor):
name = url_basename(url) name = url_basename(url)
webpage = self._download_webpage(url, name) webpage = self._download_webpage(url, name)
feed_url = self._search_regex(
r'data-feed-url="([^"]+)"', webpage, 'feed url')
guid = self._search_regex( guid = self._search_regex(
r'id="(?:videoPlayer|player-container)"[^>]+data-guid="([^"]+)"', r'id="(?:videoPlayer|player-container)"[^>]+data-guid="([^"]+)"',
webpage, 'guid') webpage, 'guid')
feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name) return {
content = feed.find('.//{http://search.yahoo.com/mrss/}content') '_type': 'url_transparent',
theplatform_id = url_basename(content.attrib.get('url')) 'ie_key': 'ThePlatform',
'url': smuggle_url(
'http://link.theplatform.com/s/ngs/media/guid/2423130747/%s?mbr=true' % guid,
{'force_smil_url': True}),
'id': guid,
}
return self.url_result(smuggle_url(
'http://link.theplatform.com/s/ngs/%s?formats=MPEG4&manifest=f4m' % theplatform_id, class NationalGeographicChannelIE(InfoExtractor):
# For some reason, the normal links don't work and we must force IE_NAME = 'natgeo:channel'
# the use of f4m _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/videos/(?P<id>[^/?]+)'
{'force_smil_url': True}))
_TESTS = [
{
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
'md5': '518c9aa655686cf81493af5cc21e2a04',
'info_dict': {
'id': 'nB5vIAfmyllm',
'ext': 'mp4',
'title': 'Uncovering a Universal Knowledge',
'description': 'md5:1a89148475bf931b3661fcd6ddb2ae3a',
},
'add_ie': ['ThePlatform'],
},
{
'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
'md5': 'c4912f656b4cbe58f3e000c489360989',
'info_dict': {
'id': '3TmMv9OvGwIR',
'ext': 'mp4',
'title': 'The Stunning Red Bird of Paradise',
'description': 'md5:7bc8cd1da29686be4d17ad1230f0140c',
},
'add_ie': ['ThePlatform'],
},
]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
release_url = self._search_regex(
r'video_auth_playlist_url\s*=\s*"([^"]+)"',
webpage, 'release url')
return {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
'url': smuggle_url(
update_url_query(release_url, {'mbr': 'true', 'switch': 'http'}),
{'force_smil_url': True}),
'display_id': display_id,
}

View File

@ -1,90 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
float_or_none,
)
class TenPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ten(play)?\.com\.au/.+'
_TEST = {
'url': 'http://tenplay.com.au/ten-insider/extra/season-2013/tenplay-tv-your-way',
'info_dict': {
'id': '2695695426001',
'ext': 'flv',
'title': 'TENplay: TV your way',
'description': 'Welcome to a new TV experience. Enjoy a taste of the TENplay benefits.',
'timestamp': 1380150606.889,
'upload_date': '20130925',
'uploader': 'TENplay',
},
'params': {
'skip_download': True, # Requires rtmpdump
}
}
_video_fields = [
'id', 'name', 'shortDescription', 'longDescription', 'creationDate',
'publishedDate', 'lastModifiedDate', 'customFields', 'videoStillURL',
'thumbnailURL', 'referenceId', 'length', 'playsTotal',
'playsTrailingWeek', 'renditions', 'captioning', 'startDate', 'endDate']
def _real_extract(self, url):
webpage = self._download_webpage(url, url)
video_id = self._html_search_regex(
r'videoID: "(\d+?)"', webpage, 'video_id')
api_token = self._html_search_regex(
r'apiToken: "([a-zA-Z0-9-_\.]+?)"', webpage, 'api_token')
title = self._html_search_regex(
r'<meta property="og:title" content="\s*(.*?)\s*"\s*/?\s*>',
webpage, 'title')
json = self._download_json('https://api.brightcove.com/services/library?command=find_video_by_id&video_id=%s&token=%s&video_fields=%s' % (video_id, api_token, ','.join(self._video_fields)), title)
formats = []
for rendition in json['renditions']:
url = rendition['remoteUrl'] or rendition['url']
protocol = 'rtmp' if url.startswith('rtmp') else 'http'
ext = 'flv' if protocol == 'rtmp' else rendition['videoContainer'].lower()
if protocol == 'rtmp':
url = url.replace('&mp4:', '')
tbr = int_or_none(rendition.get('encodingRate'), 1000)
formats.append({
'format_id': '_'.join(
['rtmp', rendition['videoContainer'].lower(),
rendition['videoCodec'].lower(), '%sk' % tbr]),
'width': int_or_none(rendition['frameWidth']),
'height': int_or_none(rendition['frameHeight']),
'tbr': tbr,
'filesize': int_or_none(rendition['size']),
'protocol': protocol,
'ext': ext,
'vcodec': rendition['videoCodec'].lower(),
'container': rendition['videoContainer'].lower(),
'url': url,
})
self._sort_formats(formats)
return {
'id': video_id,
'display_id': json['referenceId'],
'title': json['name'],
'description': json['shortDescription'] or json['longDescription'],
'formats': formats,
'thumbnails': [{
'url': json['videoStillURL']
}, {
'url': json['thumbnailURL']
}],
'thumbnail': json['videoStillURL'],
'duration': float_or_none(json.get('length'), 1000),
'timestamp': float_or_none(json.get('creationDate'), 1000),
'uploader': json.get('customFields', {}).get('production_company_distributor') or 'TENplay',
'view_count': int_or_none(json.get('playsTotal')),
}

View File

@ -154,7 +154,7 @@ class ThePlatformIE(ThePlatformBaseIE):
def hex_to_str(hex): def hex_to_str(hex):
return binascii.a2b_hex(hex) return binascii.a2b_hex(hex)
relative_path = url.split('http://link.theplatform.com/s/')[1].split('?')[0] relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1)
clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path)) clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path))
checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest() checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
sig = flags + expiration_date + checksum + str_to_hex(sig_secret) sig = flags + expiration_date + checksum + str_to_hex(sig_secret)

View File

@ -5,6 +5,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError,
int_or_none, int_or_none,
InAdvancePagedList, InAdvancePagedList,
float_or_none, float_or_none,
@ -46,6 +47,19 @@ class TudouIE(InfoExtractor):
_PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf' _PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf'
# Translated from tudou/tools/TVCHelper.as in PortalPlayer_193.swf
# 0001, 0002 and 4001 are not included as they indicate temporary issues
TVC_ERRORS = {
'0003': 'The video is deleted or does not exist',
'1001': 'This video is unavailable due to licensing issues',
'1002': 'This video is unavailable as it\'s under review',
'1003': 'This video is unavailable as it\'s under review',
'3001': 'Password required',
'5001': 'This video is available in Mainland China only due to licensing issues',
'7001': 'This video is unavailable',
'8001': 'This video is unavailable due to licensing issues',
}
def _url_for_id(self, video_id, quality=None): def _url_for_id(self, video_id, quality=None):
info_url = 'http://v2.tudou.com/f?id=' + compat_str(video_id) info_url = 'http://v2.tudou.com/f?id=' + compat_str(video_id)
if quality: if quality:
@ -63,6 +77,15 @@ class TudouIE(InfoExtractor):
if youku_vcode: if youku_vcode:
return self.url_result('youku:' + youku_vcode, ie='Youku') return self.url_result('youku:' + youku_vcode, ie='Youku')
if not item_data.get('itemSegs'):
tvc_code = item_data.get('tvcCode')
if tvc_code:
err_msg = self.TVC_ERRORS.get(tvc_code)
if err_msg:
raise ExtractorError('Tudou said: %s' % err_msg, expected=True)
raise ExtractorError('Unexpected error %s returned from Tudou' % tvc_code)
raise ExtractorError('Unxpected error returned from Tudou')
title = unescapeHTML(item_data['kw']) title = unescapeHTML(item_data['kw'])
description = item_data.get('desc') description = item_data.get('desc')
thumbnail_url = item_data.get('pic') thumbnail_url = item_data.get('pic')

View File

@ -54,6 +54,16 @@ class UdemyIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
def _extract_course_info(self, webpage, video_id):
course = self._parse_json(
unescapeHTML(self._search_regex(
r'ng-init=["\'].*\bcourse=({.+?});', webpage, 'course', default='{}')),
video_id, fatal=False) or {}
course_id = course.get('id') or self._search_regex(
(r'&quot;id&quot;\s*:\s*(\d+)', r'data-course-id=["\'](\d+)'),
webpage, 'course id')
return course_id, course.get('title')
def _enroll_course(self, base_url, webpage, course_id): def _enroll_course(self, base_url, webpage, course_id):
def combine_url(base_url, url): def combine_url(base_url, url):
return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url
@ -98,7 +108,7 @@ class UdemyIE(InfoExtractor):
error_str += ' - %s' % error_data.get('formErrors') error_str += ' - %s' % error_data.get('formErrors')
raise ExtractorError(error_str, expected=True) raise ExtractorError(error_str, expected=True)
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'): def _download_json(self, url_or_request, *args, **kwargs):
headers = { headers = {
'X-Udemy-Snail-Case': 'true', 'X-Udemy-Snail-Case': 'true',
'X-Requested-With': 'XMLHttpRequest', 'X-Requested-With': 'XMLHttpRequest',
@ -116,7 +126,7 @@ class UdemyIE(InfoExtractor):
else: else:
url_or_request = sanitized_Request(url_or_request, headers=headers) url_or_request = sanitized_Request(url_or_request, headers=headers)
response = super(UdemyIE, self)._download_json(url_or_request, video_id, note) response = super(UdemyIE, self)._download_json(url_or_request, *args, **kwargs)
self._handle_error(response) self._handle_error(response)
return response return response
@ -166,9 +176,7 @@ class UdemyIE(InfoExtractor):
webpage = self._download_webpage(url, lecture_id) webpage = self._download_webpage(url, lecture_id)
course_id = self._search_regex( course_id, _ = self._extract_course_info(webpage, lecture_id)
(r'data-course-id=["\'](\d+)', r'&quot;id&quot;\s*:\s*(\d+)'),
webpage, 'course id')
try: try:
lecture = self._download_lecture(course_id, lecture_id) lecture = self._download_lecture(course_id, lecture_id)
@ -297,7 +305,7 @@ class UdemyIE(InfoExtractor):
class UdemyCourseIE(UdemyIE): class UdemyCourseIE(UdemyIE):
IE_NAME = 'udemy:course' IE_NAME = 'udemy:course'
_VALID_URL = r'https?://www\.udemy\.com/(?P<id>[\da-z-]+)' _VALID_URL = r'https?://www\.udemy\.com/(?P<id>[^/?#&]+)'
_TESTS = [] _TESTS = []
@classmethod @classmethod
@ -309,29 +317,29 @@ class UdemyCourseIE(UdemyIE):
webpage = self._download_webpage(url, course_path) webpage = self._download_webpage(url, course_path)
response = self._download_json( course_id, title = self._extract_course_info(webpage, course_path)
'https://www.udemy.com/api-1.1/courses/%s' % course_path,
course_path, 'Downloading course JSON')
course_id = response['id']
course_title = response.get('title')
self._enroll_course(url, webpage, course_id) self._enroll_course(url, webpage, course_id)
response = self._download_json( response = self._download_json(
'https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id, 'https://www.udemy.com/api-2.0/courses/%s/cached-subscriber-curriculum-items' % course_id,
course_id, 'Downloading course curriculum') course_id, 'Downloading course curriculum', query={
'fields[chapter]': 'title,object_index',
'fields[lecture]': 'title',
'page_size': '1000',
})
entries = [] entries = []
chapter, chapter_number = None, None chapter, chapter_number = [None] * 2
for asset in response: for entry in response['results']:
asset_type = asset.get('assetType') or asset.get('asset_type') clazz = entry.get('_class')
if asset_type == 'Video': if clazz == 'lecture':
asset_id = asset.get('id') lecture_id = entry.get('id')
if asset_id: if lecture_id:
entry = { entry = {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': 'https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'url': 'https://www.udemy.com/%s/learn/v4/t/lecture/%s' % (course_path, entry['id']),
'title': entry.get('title'),
'ie_key': UdemyIE.ie_key(), 'ie_key': UdemyIE.ie_key(),
} }
if chapter_number: if chapter_number:
@ -339,8 +347,8 @@ class UdemyCourseIE(UdemyIE):
if chapter: if chapter:
entry['chapter'] = chapter entry['chapter'] = chapter
entries.append(entry) entries.append(entry)
elif asset.get('type') == 'chapter': elif clazz == 'chapter':
chapter_number = asset.get('index') or asset.get('object_index') chapter_number = entry.get('object_index')
chapter = asset.get('title') chapter = entry.get('title')
return self.playlist_result(entries, course_id, course_title) return self.playlist_result(entries, course_id, title)

View File

@ -0,0 +1,132 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
class VoxMediaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com/(?:[^/]+/)*(?P<id>[^/?]+)'
_TESTS = [{
'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of',
'md5': '73856edf3e89a711e70d5cf7cb280b37',
'info_dict': {
'id': '11eXZobjrG8DCSTgrNjVinU-YmmdYjhe',
'ext': 'mp4',
'title': 'Google\'s new material design direction',
'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2',
}
}, {
# data-ooyala-id
'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet',
'md5': 'd744484ff127884cd2ba09e3fa604e4b',
'info_dict': {
'id': 'RkZXU4cTphOCPDMZg5oEounJyoFI0g-B',
'ext': 'mp4',
'title': 'The Nexus 6: hands-on with Google\'s phablet',
'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af',
}
}, {
# volume embed
'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
'md5': '375c483c5080ab8cd85c9c84cfc2d1e4',
'info_dict': {
'id': 'wydzk3dDpmRz7PQoXRsTIX6XTkPjYL0b',
'ext': 'mp4',
'title': 'The new frontier of LGBTQ civil rights, explained',
'description': 'md5:0dc58e94a465cbe91d02950f770eb93f',
}
}, {
# youtube embed
'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance',
'md5': '83b3080489fb103941e549352d3e0977',
'info_dict': {
'id': 'FcNHTJU1ufM',
'ext': 'mp4',
'title': 'How "the robot" became the greatest novelty dance of all time',
'description': 'md5:b081c0d588b8b2085870cda55e6da176',
'upload_date': '20160324',
'uploader_id': 'voxdotcom',
'uploader': 'Vox',
}
}, {
# SBN.VideoLinkset.entryGroup multiple ooyala embeds
'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
'info_dict': {
'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
'title': '25 lies you will tell yourself on National Signing Day',
'description': 'It\'s the most self-delusional time of the year, and everyone\'s gonna tell the same lies together!',
},
'playlist': [{
'md5': '721fededf2ab74ae4176c8c8cbfe092e',
'info_dict': {
'id': 'p3cThlMjE61VDi_SD9JlIteSNPWVDBB9',
'ext': 'mp4',
'title': 'Buddy Hield vs Steph Curry (and the world)',
'description': 'Lets dissect only the most important Final Four storylines.',
},
}, {
'md5': 'bf0c5cc115636af028be1bab79217ea9',
'info_dict': {
'id': 'BmbmVjMjE6esPHxdALGubTrouQ0jYLHj',
'ext': 'mp4',
'title': 'Chasing Cinderella 2016: Syracuse basketball',
'description': 'md5:e02d56b026d51aa32c010676765a690d',
},
}],
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = compat_urllib_parse_unquote(self._download_webpage(url, display_id))
def create_entry(provider_video_id, provider_video_type, title=None, description=None):
return {
'_type': 'url_transparent',
'url': provider_video_id if provider_video_type == 'youtube' else '%s:%s' % (provider_video_type, provider_video_id),
'title': title or self._og_search_title(webpage),
'description': description or self._og_search_description(webpage),
}
entries = []
entries_data = self._search_regex([
r'Chorus\.VideoContext\.addVideo\((\[{.+}\])\);',
r'var\s+entry\s*=\s*({.+});',
r'SBN\.VideoLinkset\.entryGroup\(\s*(\[.+\])',
], webpage, 'video data', default=None)
if entries_data:
entries_data = self._parse_json(entries_data, display_id)
if isinstance(entries_data, dict):
entries_data = [entries_data]
for video_data in entries_data:
provider_video_id = video_data.get('provider_video_id')
provider_video_type = video_data.get('provider_video_type')
if provider_video_id and provider_video_type:
entries.append(create_entry(
provider_video_id, provider_video_type,
video_data.get('title'), video_data.get('description')))
provider_video_id = self._search_regex(
r'data-ooyala-id="([^"]+)"', webpage, 'ooyala id', default=None)
if provider_video_id:
entries.append(create_entry(provider_video_id, 'ooyala'))
volume_uuid = self._search_regex(
r'data-volume-uuid="([^"]+)"', webpage, 'volume uuid', default=None)
if volume_uuid:
volume_webpage = self._download_webpage(
'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid)
video_data = self._parse_json(self._search_regex(
r'Volume\.createVideo\(({.+})\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid)
for provider_video_type in ('ooyala', 'youtube'):
provider_video_id = video_data.get('%s_id' % provider_video_type)
if provider_video_id:
description = video_data.get('description_long') or video_data.get('description_short')
entries.append(create_entry(
provider_video_id, provider_video_type, video_data.get('title_short'), description))
break
if len(entries) == 1:
return entries[0]
else:
return self.playlist_result(entries, display_id, self._og_search_title(webpage), self._og_search_description(webpage))

View File

@ -778,12 +778,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
# Substitute URL if any change after escaping # Substitute URL if any change after escaping
if url != url_escaped: if url != url_escaped:
req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request req = update_Request(req, url=url_escaped)
new_req = req_type(
url_escaped, data=req.data, headers=req.headers,
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
new_req.timeout = req.timeout
req = new_req
for h, v in std_headers.items(): for h, v in std_headers.items():
# Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
@ -1804,6 +1799,20 @@ def update_url_query(url, query):
query=compat_urllib_parse_urlencode(qs, True))) query=compat_urllib_parse_urlencode(qs, True)))
def update_Request(req, url=None, data=None, headers={}, query={}):
req_headers = req.headers.copy()
req_headers.update(headers)
req_data = data or req.data
req_url = update_url_query(url or req.get_full_url(), query)
req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
new_req = req_type(
req_url, data=req_data, headers=req_headers,
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
if hasattr(req, 'timeout'):
new_req.timeout = req.timeout
return new_req
def dict_get(d, key_or_keys, default=None, skip_false_values=True): def dict_get(d, key_or_keys, default=None, skip_false_values=True):
if isinstance(key_or_keys, (list, tuple)): if isinstance(key_or_keys, (list, tuple)):
for key in key_or_keys: for key in key_or_keys:

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2016.03.27' __version__ = '2016.04.01'