Merge branch 'master' of https://github.com/rg3/youtube-dl
This commit is contained in:
commit
ca44adda06
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.03.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.03.27**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.01**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.03.27
|
||||
[debug] youtube-dl version 2016.04.01
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@ -118,6 +118,7 @@
|
||||
- **Clubic**
|
||||
- **Clyp**
|
||||
- **cmt.com**
|
||||
- **CNBC**
|
||||
- **CNET**
|
||||
- **CNN**
|
||||
- **CNNArticle**
|
||||
@ -134,6 +135,7 @@
|
||||
- **CrooksAndLiars**
|
||||
- **Crunchyroll**
|
||||
- **crunchyroll:playlist**
|
||||
- **CSNNE**
|
||||
- **CSpan**: C-SPAN
|
||||
- **CtsNews**: 華視新聞
|
||||
- **culturebox.francetvinfo.fr**
|
||||
@ -376,7 +378,8 @@
|
||||
- **myvideo** (Currently broken)
|
||||
- **MyVidster**
|
||||
- **n-tv.de**
|
||||
- **NationalGeographic**
|
||||
- **natgeo**
|
||||
- **natgeo:channel**
|
||||
- **Naver**
|
||||
- **NBA**
|
||||
- **NBC**
|
||||
@ -618,7 +621,6 @@
|
||||
- **Telegraaf**
|
||||
- **TeleMB**
|
||||
- **TeleTask**
|
||||
- **TenPlay**
|
||||
- **TF1**
|
||||
- **TheIntercept**
|
||||
- **TheOnion**
|
||||
@ -740,6 +742,7 @@
|
||||
- **vlive**
|
||||
- **Vodlocker**
|
||||
- **VoiceRepublic**
|
||||
- **VoxMedia**
|
||||
- **Vporn**
|
||||
- **vpro**: npo.nl and ntr.nl
|
||||
- **VRT**
|
||||
|
@ -438,7 +438,10 @@ from .myspass import MySpassIE
|
||||
from .myvi import MyviIE
|
||||
from .myvideo import MyVideoIE
|
||||
from .myvidster import MyVidsterIE
|
||||
from .nationalgeographic import NationalGeographicIE
|
||||
from .nationalgeographic import (
|
||||
NationalGeographicIE,
|
||||
NationalGeographicChannelIE,
|
||||
)
|
||||
from .naver import NaverIE
|
||||
from .nba import NBAIE
|
||||
from .nbc import (
|
||||
@ -737,7 +740,6 @@ from .telecinco import TelecincoIE
|
||||
from .telegraaf import TelegraafIE
|
||||
from .telemb import TeleMBIE
|
||||
from .teletask import TeleTaskIE
|
||||
from .tenplay import TenPlayIE
|
||||
from .testurl import TestURLIE
|
||||
from .tf1 import TF1IE
|
||||
from .theintercept import TheInterceptIE
|
||||
@ -902,6 +904,7 @@ from .vk import (
|
||||
from .vlive import VLiveIE
|
||||
from .vodlocker import VodlockerIE
|
||||
from .voicerepublic import VoiceRepublicIE
|
||||
from .voxmedia import VoxMediaIE
|
||||
from .vporn import VpornIE
|
||||
from .vrt import VRTIE
|
||||
from .vube import VubeIE
|
||||
|
@ -1,13 +1,19 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class AENetworksIE(InfoExtractor):
|
||||
IE_NAME = 'aenetworks'
|
||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?P<type>[^/]+)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
|
||||
@ -25,16 +31,13 @@ class AENetworksIE(InfoExtractor):
|
||||
'expected_warnings': ['JSON-LD'],
|
||||
}, {
|
||||
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
||||
'md5': '8ff93eb073449f151d6b90c0ae1ef0c7',
|
||||
'info_dict': {
|
||||
'id': 'eg47EERs_JsZ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Winter Is Coming',
|
||||
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}, {
|
||||
'url': 'http://www.aetv.com/shows/duck-dynasty/video/inlawful-entry',
|
||||
@ -48,7 +51,7 @@ class AENetworksIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
page_type, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
@ -56,11 +59,22 @@ class AENetworksIE(InfoExtractor):
|
||||
r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
|
||||
r"media_url\s*=\s*'([^']+)'"
|
||||
]
|
||||
video_url = self._search_regex(video_url_re, webpage, 'video url')
|
||||
video_url = unescapeHTML(self._search_regex(video_url_re, webpage, 'video url'))
|
||||
query = {'mbr': 'true'}
|
||||
if page_type == 'shows':
|
||||
query['assetTypes'] = 'medium_video_s3'
|
||||
if 'switch=hds' in video_url:
|
||||
query['switch'] = 'hls'
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, fatal=False)
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}}),
|
||||
'url': smuggle_url(update_url_query(
|
||||
video_url, query), {
|
||||
'sig': {
|
||||
'key': 'crazyjava',
|
||||
'secret': 's3cr3t'},
|
||||
'force_smil_url': True
|
||||
}),
|
||||
})
|
||||
return info
|
||||
|
@ -34,7 +34,7 @@ class BeegIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'https://api.beeg.com/api/v5/video/%s' % video_id, video_id)
|
||||
'https://api.beeg.com/api/v6/1738/video/%s' % video_id, video_id)
|
||||
|
||||
def split(o, e):
|
||||
def cut(s, x):
|
||||
@ -50,8 +50,8 @@ class BeegIE(InfoExtractor):
|
||||
return n
|
||||
|
||||
def decrypt_key(key):
|
||||
# Reverse engineered from http://static.beeg.com/cpl/1105.js
|
||||
a = '5ShMcIQlssOd7zChAIOlmeTZDaUxULbJRnywYaiB'
|
||||
# Reverse engineered from http://static.beeg.com/cpl/1738.js
|
||||
a = 'GUuyodcfS8FW8gQp4OKLMsZBcX0T7B'
|
||||
e = compat_urllib_parse_unquote(key)
|
||||
o = ''.join([
|
||||
compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21)
|
||||
|
@ -439,7 +439,7 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
</video>.*?
|
||||
<script[^>]+
|
||||
src=["\'](?:https?:)?//players\.brightcove\.net/
|
||||
(\d+)/([\da-f-]+)_([^/]+)/index(?:\.min)?\.js
|
||||
(\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js
|
||||
''', webpage):
|
||||
entries.append(
|
||||
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
|
||||
|
@ -1,21 +1,35 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
xpath_text,
|
||||
xpath_element,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
)
|
||||
|
||||
|
||||
class CBSIE(InfoExtractor):
|
||||
class CBSBaseIE(ThePlatformIE):
|
||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||
closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL')
|
||||
return {
|
||||
'en': [{
|
||||
'ext': 'ttml',
|
||||
'url': closed_caption_e.attrib['value'],
|
||||
}]
|
||||
} if closed_caption_e is not None and closed_caption_e.attrib.get('value') else []
|
||||
|
||||
|
||||
class CBSIE(CBSBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||
'info_dict': {
|
||||
'id': '4JUVEwq3wUT7',
|
||||
'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_',
|
||||
'display_id': 'connect-chat-feat-garth-brooks',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Connect Chat feat. Garth Brooks',
|
||||
'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
|
||||
'duration': 1495,
|
||||
@ -47,22 +61,46 @@ class CBSIE(InfoExtractor):
|
||||
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?manifest=m3u&mbr=true'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
request = sanitized_Request(url)
|
||||
# Android UA is served with higher quality (720p) streams (see
|
||||
# https://github.com/rg3/youtube-dl/issues/7490)
|
||||
request.add_header('User-Agent', 'Mozilla/5.0 (Linux; Android 4.4; Nexus 5)')
|
||||
webpage = self._download_webpage(request, display_id)
|
||||
real_id = self._search_regex(
|
||||
[r"video\.settings\.pid\s*=\s*'([^']+)';", r"cbsplayer\.pid\s*=\s*'([^']+)';"],
|
||||
webpage, 'real video ID')
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(
|
||||
'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true&manifest=m3u' % real_id,
|
||||
{'force_smil_url': True}),
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
content_id = self._search_regex(
|
||||
[r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],
|
||||
webpage, 'content id')
|
||||
items_data = self._download_xml(
|
||||
'http://can.cbs.com/thunder/player/videoPlayerService.php',
|
||||
content_id, query={'partner': 'cbs', 'contentId': content_id})
|
||||
video_data = xpath_element(items_data, './/item')
|
||||
title = xpath_text(video_data, 'videoTitle', 'title', True)
|
||||
|
||||
subtitles = {}
|
||||
formats = []
|
||||
for item in items_data.findall('.//item'):
|
||||
pid = xpath_text(item, 'pid')
|
||||
if not pid:
|
||||
continue
|
||||
try:
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
self.TP_RELEASE_URL_TEMPLATE % pid, content_id, 'Downloading %s SMIL data' % pid)
|
||||
except ExtractorError:
|
||||
continue
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self.get_metadata('dJ5BDC/media/guid/2198311517/%s' % content_id, content_id)
|
||||
info.update({
|
||||
'id': content_id,
|
||||
'display_id': display_id,
|
||||
}
|
||||
'title': title,
|
||||
'series': xpath_text(video_data, 'seriesTitle'),
|
||||
'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
|
||||
'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
|
||||
'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
|
||||
'thumbnail': xpath_text(video_data, 'previewImageURL'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
return info
|
||||
|
@ -2,14 +2,14 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .theplatform import ThePlatformIE
|
||||
from .cbs import CBSBaseIE
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
find_xpath_attr,
|
||||
)
|
||||
|
||||
|
||||
class CBSNewsIE(ThePlatformIE):
|
||||
class CBSNewsIE(CBSBaseIE):
|
||||
IE_DESC = 'CBS News'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
|
||||
|
||||
@ -49,15 +49,6 @@ class CBSNewsIE(ThePlatformIE):
|
||||
},
|
||||
]
|
||||
|
||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||
closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL')
|
||||
return {
|
||||
'en': [{
|
||||
'ext': 'ttml',
|
||||
'url': closed_caption_e.attrib['value'],
|
||||
}]
|
||||
} if closed_caption_e is not None and closed_caption_e.attrib.get('value') else []
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
|
@ -22,6 +22,7 @@ from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..downloader.f4m import remove_encrypted_media
|
||||
@ -49,6 +50,7 @@ from ..utils import (
|
||||
determine_protocol,
|
||||
parse_duration,
|
||||
mimetype2ext,
|
||||
update_Request,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
@ -347,7 +349,7 @@ class InfoExtractor(object):
|
||||
def IE_NAME(self):
|
||||
return compat_str(type(self).__name__[:-2])
|
||||
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None):
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
|
||||
""" Returns the response handle """
|
||||
if note is None:
|
||||
self.report_download_webpage(video_id)
|
||||
@ -357,11 +359,14 @@ class InfoExtractor(object):
|
||||
else:
|
||||
self.to_screen('%s: %s' % (video_id, note))
|
||||
# data, headers and query params will be ignored for `Request` objects
|
||||
if isinstance(url_or_request, compat_str):
|
||||
if isinstance(url_or_request, compat_urllib_request.Request):
|
||||
url_or_request = update_Request(
|
||||
url_or_request, data=data, headers=headers, query=query)
|
||||
else:
|
||||
if query:
|
||||
url_or_request = update_url_query(url_or_request, query)
|
||||
if data or headers:
|
||||
url_or_request = sanitized_Request(url_or_request, data, headers or {})
|
||||
url_or_request = sanitized_Request(url_or_request, data, headers)
|
||||
try:
|
||||
return self._downloader.urlopen(url_or_request)
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
@ -377,7 +382,7 @@ class InfoExtractor(object):
|
||||
self._downloader.report_warning(errmsg)
|
||||
return False
|
||||
|
||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers=None, query=None):
|
||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}):
|
||||
""" Returns a tuple (page content as string, URL handle) """
|
||||
# Strip hashes from the URL (#1038)
|
||||
if isinstance(url_or_request, (compat_str, str)):
|
||||
@ -470,7 +475,7 @@ class InfoExtractor(object):
|
||||
|
||||
return content
|
||||
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers=None, query=None):
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers={}, query={}):
|
||||
""" Returns the data of the page as a string """
|
||||
success = False
|
||||
try_count = 0
|
||||
@ -491,7 +496,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _download_xml(self, url_or_request, video_id,
|
||||
note='Downloading XML', errnote='Unable to download XML',
|
||||
transform_source=None, fatal=True, encoding=None, data=None, headers=None, query=None):
|
||||
transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}):
|
||||
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||
xml_string = self._download_webpage(
|
||||
url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query)
|
||||
@ -505,7 +510,7 @@ class InfoExtractor(object):
|
||||
note='Downloading JSON metadata',
|
||||
errnote='Unable to download JSON metadata',
|
||||
transform_source=None,
|
||||
fatal=True, encoding=None, data=None, headers=None, query=None):
|
||||
fatal=True, encoding=None, data=None, headers={}, query={}):
|
||||
json_string = self._download_webpage(
|
||||
url_or_request, video_id, note, errnote, fatal=fatal,
|
||||
encoding=encoding, data=data, headers=headers, query=query)
|
||||
|
@ -406,19 +406,6 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# multiple ooyala embeds on SBN network websites
|
||||
{
|
||||
'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
||||
'info_dict': {
|
||||
'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
||||
'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
},
|
||||
# embed.ly video
|
||||
{
|
||||
'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
|
||||
|
@ -6,6 +6,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
unescapeHTML,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
@ -39,7 +40,7 @@ class HowStuffWorksIE(InfoExtractor):
|
||||
'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm',
|
||||
'info_dict': {
|
||||
'id': '440011',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sword Swallowing #1 by Dan Meyer',
|
||||
'description': 'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org>',
|
||||
'display_id': 'sword-swallowing-1-by-dan-meyer',
|
||||
@ -63,13 +64,19 @@ class HowStuffWorksIE(InfoExtractor):
|
||||
video_id = clip_info['content_id']
|
||||
formats = []
|
||||
m3u8_url = clip_info.get('m3u8')
|
||||
if m3u8_url:
|
||||
formats += self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
|
||||
if m3u8_url and determine_ext(m3u8_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', format_id='hls', fatal=True))
|
||||
flv_url = clip_info.get('flv_url')
|
||||
if flv_url:
|
||||
formats.append({
|
||||
'url': flv_url,
|
||||
'format_id': 'flv',
|
||||
})
|
||||
for video in clip_info.get('mp4', []):
|
||||
formats.append({
|
||||
'url': video['src'],
|
||||
'format_id': video['bitrate'],
|
||||
'vbr': int(video['bitrate'].rstrip('k')),
|
||||
'format_id': 'mp4-%s' % video['bitrate'],
|
||||
'vbr': int_or_none(video['bitrate'].rstrip('k')),
|
||||
})
|
||||
|
||||
if not formats:
|
||||
@ -102,6 +109,6 @@ class HowStuffWorksIE(InfoExtractor):
|
||||
'title': unescapeHTML(clip_info['clip_title']),
|
||||
'description': unescapeHTML(clip_info.get('caption')),
|
||||
'thumbnail': clip_info.get('video_still_url'),
|
||||
'duration': clip_info.get('duration'),
|
||||
'duration': int_or_none(clip_info.get('duration')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -4,18 +4,21 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
url_basename,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class NationalGeographicIE(InfoExtractor):
|
||||
IE_NAME = 'natgeo'
|
||||
_VALID_URL = r'https?://video\.nationalgeographic\.com/.*?'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
|
||||
'md5': '730855d559abbad6b42c2be1fa584917',
|
||||
'info_dict': {
|
||||
'id': '4DmDACA6Qtk_',
|
||||
'ext': 'flv',
|
||||
'id': '0000014b-70a1-dd8c-af7f-f7b559330001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mating Crabs Busted by Sharks',
|
||||
'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
|
||||
},
|
||||
@ -23,9 +26,10 @@ class NationalGeographicIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://video.nationalgeographic.com/wild/when-sharks-attack/the-real-jaws',
|
||||
'md5': '6a3105eb448c070503b3105fb9b320b5',
|
||||
'info_dict': {
|
||||
'id': '_JeBD_D7PlS5',
|
||||
'ext': 'flv',
|
||||
'id': 'ngc-I0IauNSWznb_UV008GxSbwY35BZvgi2e',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Real Jaws',
|
||||
'description': 'md5:8d3e09d9d53a85cd397b4b21b2c77be6',
|
||||
},
|
||||
@ -37,18 +41,61 @@ class NationalGeographicIE(InfoExtractor):
|
||||
name = url_basename(url)
|
||||
|
||||
webpage = self._download_webpage(url, name)
|
||||
feed_url = self._search_regex(
|
||||
r'data-feed-url="([^"]+)"', webpage, 'feed url')
|
||||
guid = self._search_regex(
|
||||
r'id="(?:videoPlayer|player-container)"[^>]+data-guid="([^"]+)"',
|
||||
webpage, 'guid')
|
||||
|
||||
feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name)
|
||||
content = feed.find('.//{http://search.yahoo.com/mrss/}content')
|
||||
theplatform_id = url_basename(content.attrib.get('url'))
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(
|
||||
'http://link.theplatform.com/s/ngs/media/guid/2423130747/%s?mbr=true' % guid,
|
||||
{'force_smil_url': True}),
|
||||
'id': guid,
|
||||
}
|
||||
|
||||
return self.url_result(smuggle_url(
|
||||
'http://link.theplatform.com/s/ngs/%s?formats=MPEG4&manifest=f4m' % theplatform_id,
|
||||
# For some reason, the normal links don't work and we must force
|
||||
# the use of f4m
|
||||
{'force_smil_url': True}))
|
||||
|
||||
class NationalGeographicChannelIE(InfoExtractor):
|
||||
IE_NAME = 'natgeo:channel'
|
||||
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/videos/(?P<id>[^/?]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
|
||||
'md5': '518c9aa655686cf81493af5cc21e2a04',
|
||||
'info_dict': {
|
||||
'id': 'nB5vIAfmyllm',
|
||||
'ext': 'mp4',
|
||||
'title': 'Uncovering a Universal Knowledge',
|
||||
'description': 'md5:1a89148475bf931b3661fcd6ddb2ae3a',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
|
||||
'md5': 'c4912f656b4cbe58f3e000c489360989',
|
||||
'info_dict': {
|
||||
'id': '3TmMv9OvGwIR',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Stunning Red Bird of Paradise',
|
||||
'description': 'md5:7bc8cd1da29686be4d17ad1230f0140c',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
release_url = self._search_regex(
|
||||
r'video_auth_playlist_url\s*=\s*"([^"]+)"',
|
||||
webpage, 'release url')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(
|
||||
update_url_query(release_url, {'mbr': 'true', 'switch': 'http'}),
|
||||
{'force_smil_url': True}),
|
||||
'display_id': display_id,
|
||||
}
|
||||
|
@ -1,90 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
class TenPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ten(play)?\.com\.au/.+'
|
||||
_TEST = {
|
||||
'url': 'http://tenplay.com.au/ten-insider/extra/season-2013/tenplay-tv-your-way',
|
||||
'info_dict': {
|
||||
'id': '2695695426001',
|
||||
'ext': 'flv',
|
||||
'title': 'TENplay: TV your way',
|
||||
'description': 'Welcome to a new TV experience. Enjoy a taste of the TENplay benefits.',
|
||||
'timestamp': 1380150606.889,
|
||||
'upload_date': '20130925',
|
||||
'uploader': 'TENplay',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Requires rtmpdump
|
||||
}
|
||||
}
|
||||
|
||||
_video_fields = [
|
||||
'id', 'name', 'shortDescription', 'longDescription', 'creationDate',
|
||||
'publishedDate', 'lastModifiedDate', 'customFields', 'videoStillURL',
|
||||
'thumbnailURL', 'referenceId', 'length', 'playsTotal',
|
||||
'playsTrailingWeek', 'renditions', 'captioning', 'startDate', 'endDate']
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url)
|
||||
video_id = self._html_search_regex(
|
||||
r'videoID: "(\d+?)"', webpage, 'video_id')
|
||||
api_token = self._html_search_regex(
|
||||
r'apiToken: "([a-zA-Z0-9-_\.]+?)"', webpage, 'api_token')
|
||||
title = self._html_search_regex(
|
||||
r'<meta property="og:title" content="\s*(.*?)\s*"\s*/?\s*>',
|
||||
webpage, 'title')
|
||||
|
||||
json = self._download_json('https://api.brightcove.com/services/library?command=find_video_by_id&video_id=%s&token=%s&video_fields=%s' % (video_id, api_token, ','.join(self._video_fields)), title)
|
||||
|
||||
formats = []
|
||||
for rendition in json['renditions']:
|
||||
url = rendition['remoteUrl'] or rendition['url']
|
||||
protocol = 'rtmp' if url.startswith('rtmp') else 'http'
|
||||
ext = 'flv' if protocol == 'rtmp' else rendition['videoContainer'].lower()
|
||||
|
||||
if protocol == 'rtmp':
|
||||
url = url.replace('&mp4:', '')
|
||||
|
||||
tbr = int_or_none(rendition.get('encodingRate'), 1000)
|
||||
|
||||
formats.append({
|
||||
'format_id': '_'.join(
|
||||
['rtmp', rendition['videoContainer'].lower(),
|
||||
rendition['videoCodec'].lower(), '%sk' % tbr]),
|
||||
'width': int_or_none(rendition['frameWidth']),
|
||||
'height': int_or_none(rendition['frameHeight']),
|
||||
'tbr': tbr,
|
||||
'filesize': int_or_none(rendition['size']),
|
||||
'protocol': protocol,
|
||||
'ext': ext,
|
||||
'vcodec': rendition['videoCodec'].lower(),
|
||||
'container': rendition['videoContainer'].lower(),
|
||||
'url': url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': json['referenceId'],
|
||||
'title': json['name'],
|
||||
'description': json['shortDescription'] or json['longDescription'],
|
||||
'formats': formats,
|
||||
'thumbnails': [{
|
||||
'url': json['videoStillURL']
|
||||
}, {
|
||||
'url': json['thumbnailURL']
|
||||
}],
|
||||
'thumbnail': json['videoStillURL'],
|
||||
'duration': float_or_none(json.get('length'), 1000),
|
||||
'timestamp': float_or_none(json.get('creationDate'), 1000),
|
||||
'uploader': json.get('customFields', {}).get('production_company_distributor') or 'TENplay',
|
||||
'view_count': int_or_none(json.get('playsTotal')),
|
||||
}
|
@ -154,7 +154,7 @@ class ThePlatformIE(ThePlatformBaseIE):
|
||||
def hex_to_str(hex):
|
||||
return binascii.a2b_hex(hex)
|
||||
|
||||
relative_path = url.split('http://link.theplatform.com/s/')[1].split('?')[0]
|
||||
relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1)
|
||||
clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path))
|
||||
checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
|
||||
sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
|
||||
|
@ -5,6 +5,7 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
InAdvancePagedList,
|
||||
float_or_none,
|
||||
@ -46,6 +47,19 @@ class TudouIE(InfoExtractor):
|
||||
|
||||
_PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf'
|
||||
|
||||
# Translated from tudou/tools/TVCHelper.as in PortalPlayer_193.swf
|
||||
# 0001, 0002 and 4001 are not included as they indicate temporary issues
|
||||
TVC_ERRORS = {
|
||||
'0003': 'The video is deleted or does not exist',
|
||||
'1001': 'This video is unavailable due to licensing issues',
|
||||
'1002': 'This video is unavailable as it\'s under review',
|
||||
'1003': 'This video is unavailable as it\'s under review',
|
||||
'3001': 'Password required',
|
||||
'5001': 'This video is available in Mainland China only due to licensing issues',
|
||||
'7001': 'This video is unavailable',
|
||||
'8001': 'This video is unavailable due to licensing issues',
|
||||
}
|
||||
|
||||
def _url_for_id(self, video_id, quality=None):
|
||||
info_url = 'http://v2.tudou.com/f?id=' + compat_str(video_id)
|
||||
if quality:
|
||||
@ -63,6 +77,15 @@ class TudouIE(InfoExtractor):
|
||||
if youku_vcode:
|
||||
return self.url_result('youku:' + youku_vcode, ie='Youku')
|
||||
|
||||
if not item_data.get('itemSegs'):
|
||||
tvc_code = item_data.get('tvcCode')
|
||||
if tvc_code:
|
||||
err_msg = self.TVC_ERRORS.get(tvc_code)
|
||||
if err_msg:
|
||||
raise ExtractorError('Tudou said: %s' % err_msg, expected=True)
|
||||
raise ExtractorError('Unexpected error %s returned from Tudou' % tvc_code)
|
||||
raise ExtractorError('Unxpected error returned from Tudou')
|
||||
|
||||
title = unescapeHTML(item_data['kw'])
|
||||
description = item_data.get('desc')
|
||||
thumbnail_url = item_data.get('pic')
|
||||
|
@ -54,6 +54,16 @@ class UdemyIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_course_info(self, webpage, video_id):
|
||||
course = self._parse_json(
|
||||
unescapeHTML(self._search_regex(
|
||||
r'ng-init=["\'].*\bcourse=({.+?});', webpage, 'course', default='{}')),
|
||||
video_id, fatal=False) or {}
|
||||
course_id = course.get('id') or self._search_regex(
|
||||
(r'"id"\s*:\s*(\d+)', r'data-course-id=["\'](\d+)'),
|
||||
webpage, 'course id')
|
||||
return course_id, course.get('title')
|
||||
|
||||
def _enroll_course(self, base_url, webpage, course_id):
|
||||
def combine_url(base_url, url):
|
||||
return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url
|
||||
@ -98,7 +108,7 @@ class UdemyIE(InfoExtractor):
|
||||
error_str += ' - %s' % error_data.get('formErrors')
|
||||
raise ExtractorError(error_str, expected=True)
|
||||
|
||||
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
|
||||
def _download_json(self, url_or_request, *args, **kwargs):
|
||||
headers = {
|
||||
'X-Udemy-Snail-Case': 'true',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
@ -116,7 +126,7 @@ class UdemyIE(InfoExtractor):
|
||||
else:
|
||||
url_or_request = sanitized_Request(url_or_request, headers=headers)
|
||||
|
||||
response = super(UdemyIE, self)._download_json(url_or_request, video_id, note)
|
||||
response = super(UdemyIE, self)._download_json(url_or_request, *args, **kwargs)
|
||||
self._handle_error(response)
|
||||
return response
|
||||
|
||||
@ -166,9 +176,7 @@ class UdemyIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, lecture_id)
|
||||
|
||||
course_id = self._search_regex(
|
||||
(r'data-course-id=["\'](\d+)', r'"id"\s*:\s*(\d+)'),
|
||||
webpage, 'course id')
|
||||
course_id, _ = self._extract_course_info(webpage, lecture_id)
|
||||
|
||||
try:
|
||||
lecture = self._download_lecture(course_id, lecture_id)
|
||||
@ -297,7 +305,7 @@ class UdemyIE(InfoExtractor):
|
||||
|
||||
class UdemyCourseIE(UdemyIE):
|
||||
IE_NAME = 'udemy:course'
|
||||
_VALID_URL = r'https?://www\.udemy\.com/(?P<id>[\da-z-]+)'
|
||||
_VALID_URL = r'https?://www\.udemy\.com/(?P<id>[^/?#&]+)'
|
||||
_TESTS = []
|
||||
|
||||
@classmethod
|
||||
@ -309,29 +317,29 @@ class UdemyCourseIE(UdemyIE):
|
||||
|
||||
webpage = self._download_webpage(url, course_path)
|
||||
|
||||
response = self._download_json(
|
||||
'https://www.udemy.com/api-1.1/courses/%s' % course_path,
|
||||
course_path, 'Downloading course JSON')
|
||||
|
||||
course_id = response['id']
|
||||
course_title = response.get('title')
|
||||
course_id, title = self._extract_course_info(webpage, course_path)
|
||||
|
||||
self._enroll_course(url, webpage, course_id)
|
||||
|
||||
response = self._download_json(
|
||||
'https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id,
|
||||
course_id, 'Downloading course curriculum')
|
||||
'https://www.udemy.com/api-2.0/courses/%s/cached-subscriber-curriculum-items' % course_id,
|
||||
course_id, 'Downloading course curriculum', query={
|
||||
'fields[chapter]': 'title,object_index',
|
||||
'fields[lecture]': 'title',
|
||||
'page_size': '1000',
|
||||
})
|
||||
|
||||
entries = []
|
||||
chapter, chapter_number = None, None
|
||||
for asset in response:
|
||||
asset_type = asset.get('assetType') or asset.get('asset_type')
|
||||
if asset_type == 'Video':
|
||||
asset_id = asset.get('id')
|
||||
if asset_id:
|
||||
chapter, chapter_number = [None] * 2
|
||||
for entry in response['results']:
|
||||
clazz = entry.get('_class')
|
||||
if clazz == 'lecture':
|
||||
lecture_id = entry.get('id')
|
||||
if lecture_id:
|
||||
entry = {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']),
|
||||
'url': 'https://www.udemy.com/%s/learn/v4/t/lecture/%s' % (course_path, entry['id']),
|
||||
'title': entry.get('title'),
|
||||
'ie_key': UdemyIE.ie_key(),
|
||||
}
|
||||
if chapter_number:
|
||||
@ -339,8 +347,8 @@ class UdemyCourseIE(UdemyIE):
|
||||
if chapter:
|
||||
entry['chapter'] = chapter
|
||||
entries.append(entry)
|
||||
elif asset.get('type') == 'chapter':
|
||||
chapter_number = asset.get('index') or asset.get('object_index')
|
||||
chapter = asset.get('title')
|
||||
elif clazz == 'chapter':
|
||||
chapter_number = entry.get('object_index')
|
||||
chapter = entry.get('title')
|
||||
|
||||
return self.playlist_result(entries, course_id, course_title)
|
||||
return self.playlist_result(entries, course_id, title)
|
||||
|
132
youtube_dl/extractor/voxmedia.py
Normal file
132
youtube_dl/extractor/voxmedia.py
Normal file
@ -0,0 +1,132 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
|
||||
|
||||
class VoxMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com/(?:[^/]+/)*(?P<id>[^/?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of',
|
||||
'md5': '73856edf3e89a711e70d5cf7cb280b37',
|
||||
'info_dict': {
|
||||
'id': '11eXZobjrG8DCSTgrNjVinU-YmmdYjhe',
|
||||
'ext': 'mp4',
|
||||
'title': 'Google\'s new material design direction',
|
||||
'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2',
|
||||
}
|
||||
}, {
|
||||
# data-ooyala-id
|
||||
'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet',
|
||||
'md5': 'd744484ff127884cd2ba09e3fa604e4b',
|
||||
'info_dict': {
|
||||
'id': 'RkZXU4cTphOCPDMZg5oEounJyoFI0g-B',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Nexus 6: hands-on with Google\'s phablet',
|
||||
'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af',
|
||||
}
|
||||
}, {
|
||||
# volume embed
|
||||
'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
|
||||
'md5': '375c483c5080ab8cd85c9c84cfc2d1e4',
|
||||
'info_dict': {
|
||||
'id': 'wydzk3dDpmRz7PQoXRsTIX6XTkPjYL0b',
|
||||
'ext': 'mp4',
|
||||
'title': 'The new frontier of LGBTQ civil rights, explained',
|
||||
'description': 'md5:0dc58e94a465cbe91d02950f770eb93f',
|
||||
}
|
||||
}, {
|
||||
# youtube embed
|
||||
'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance',
|
||||
'md5': '83b3080489fb103941e549352d3e0977',
|
||||
'info_dict': {
|
||||
'id': 'FcNHTJU1ufM',
|
||||
'ext': 'mp4',
|
||||
'title': 'How "the robot" became the greatest novelty dance of all time',
|
||||
'description': 'md5:b081c0d588b8b2085870cda55e6da176',
|
||||
'upload_date': '20160324',
|
||||
'uploader_id': 'voxdotcom',
|
||||
'uploader': 'Vox',
|
||||
}
|
||||
}, {
|
||||
# SBN.VideoLinkset.entryGroup multiple ooyala embeds
|
||||
'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
||||
'info_dict': {
|
||||
'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
||||
'title': '25 lies you will tell yourself on National Signing Day',
|
||||
'description': 'It\'s the most self-delusional time of the year, and everyone\'s gonna tell the same lies together!',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '721fededf2ab74ae4176c8c8cbfe092e',
|
||||
'info_dict': {
|
||||
'id': 'p3cThlMjE61VDi_SD9JlIteSNPWVDBB9',
|
||||
'ext': 'mp4',
|
||||
'title': 'Buddy Hield vs Steph Curry (and the world)',
|
||||
'description': 'Let’s dissect only the most important Final Four storylines.',
|
||||
},
|
||||
}, {
|
||||
'md5': 'bf0c5cc115636af028be1bab79217ea9',
|
||||
'info_dict': {
|
||||
'id': 'BmbmVjMjE6esPHxdALGubTrouQ0jYLHj',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chasing Cinderella 2016: Syracuse basketball',
|
||||
'description': 'md5:e02d56b026d51aa32c010676765a690d',
|
||||
},
|
||||
}],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = compat_urllib_parse_unquote(self._download_webpage(url, display_id))
|
||||
|
||||
def create_entry(provider_video_id, provider_video_type, title=None, description=None):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': provider_video_id if provider_video_type == 'youtube' else '%s:%s' % (provider_video_type, provider_video_id),
|
||||
'title': title or self._og_search_title(webpage),
|
||||
'description': description or self._og_search_description(webpage),
|
||||
}
|
||||
|
||||
entries = []
|
||||
entries_data = self._search_regex([
|
||||
r'Chorus\.VideoContext\.addVideo\((\[{.+}\])\);',
|
||||
r'var\s+entry\s*=\s*({.+});',
|
||||
r'SBN\.VideoLinkset\.entryGroup\(\s*(\[.+\])',
|
||||
], webpage, 'video data', default=None)
|
||||
if entries_data:
|
||||
entries_data = self._parse_json(entries_data, display_id)
|
||||
if isinstance(entries_data, dict):
|
||||
entries_data = [entries_data]
|
||||
for video_data in entries_data:
|
||||
provider_video_id = video_data.get('provider_video_id')
|
||||
provider_video_type = video_data.get('provider_video_type')
|
||||
if provider_video_id and provider_video_type:
|
||||
entries.append(create_entry(
|
||||
provider_video_id, provider_video_type,
|
||||
video_data.get('title'), video_data.get('description')))
|
||||
|
||||
provider_video_id = self._search_regex(
|
||||
r'data-ooyala-id="([^"]+)"', webpage, 'ooyala id', default=None)
|
||||
if provider_video_id:
|
||||
entries.append(create_entry(provider_video_id, 'ooyala'))
|
||||
|
||||
volume_uuid = self._search_regex(
|
||||
r'data-volume-uuid="([^"]+)"', webpage, 'volume uuid', default=None)
|
||||
if volume_uuid:
|
||||
volume_webpage = self._download_webpage(
|
||||
'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid)
|
||||
video_data = self._parse_json(self._search_regex(
|
||||
r'Volume\.createVideo\(({.+})\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid)
|
||||
for provider_video_type in ('ooyala', 'youtube'):
|
||||
provider_video_id = video_data.get('%s_id' % provider_video_type)
|
||||
if provider_video_id:
|
||||
description = video_data.get('description_long') or video_data.get('description_short')
|
||||
entries.append(create_entry(
|
||||
provider_video_id, provider_video_type, video_data.get('title_short'), description))
|
||||
break
|
||||
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
else:
|
||||
return self.playlist_result(entries, display_id, self._og_search_title(webpage), self._og_search_description(webpage))
|
@ -778,12 +778,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
||||
|
||||
# Substitute URL if any change after escaping
|
||||
if url != url_escaped:
|
||||
req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
|
||||
new_req = req_type(
|
||||
url_escaped, data=req.data, headers=req.headers,
|
||||
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
|
||||
new_req.timeout = req.timeout
|
||||
req = new_req
|
||||
req = update_Request(req, url=url_escaped)
|
||||
|
||||
for h, v in std_headers.items():
|
||||
# Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
|
||||
@ -1804,6 +1799,20 @@ def update_url_query(url, query):
|
||||
query=compat_urllib_parse_urlencode(qs, True)))
|
||||
|
||||
|
||||
def update_Request(req, url=None, data=None, headers={}, query={}):
|
||||
req_headers = req.headers.copy()
|
||||
req_headers.update(headers)
|
||||
req_data = data or req.data
|
||||
req_url = update_url_query(url or req.get_full_url(), query)
|
||||
req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
|
||||
new_req = req_type(
|
||||
req_url, data=req_data, headers=req_headers,
|
||||
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
|
||||
if hasattr(req, 'timeout'):
|
||||
new_req.timeout = req.timeout
|
||||
return new_req
|
||||
|
||||
|
||||
def dict_get(d, key_or_keys, default=None, skip_false_values=True):
|
||||
if isinstance(key_or_keys, (list, tuple)):
|
||||
for key in key_or_keys:
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.03.27'
|
||||
__version__ = '2016.04.01'
|
||||
|
Loading…
x
Reference in New Issue
Block a user