commit
cf663e548f
@ -108,7 +108,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--playlist-reverse Download playlist videos in reverse order
|
||||
--xattr-set-filesize Set file xattribute ytdl.filesize with expected filesize (experimental)
|
||||
--hls-prefer-native Use the native HLS downloader instead of ffmpeg (experimental)
|
||||
--external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,httpie,wget
|
||||
--external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,axel,curl,httpie,wget
|
||||
--external-downloader-args ARGS Give these arguments to the external downloader
|
||||
|
||||
## Filesystem Options:
|
||||
|
@ -86,7 +86,7 @@
|
||||
- **chirbit:profile**
|
||||
- **Cinchcast**
|
||||
- **Cinemassacre**
|
||||
- **clipfish**
|
||||
- **Clipfish**
|
||||
- **cliphunter**
|
||||
- **Clipsyndicate**
|
||||
- **Cloudy**
|
||||
@ -116,6 +116,7 @@
|
||||
- **DailymotionCloud**
|
||||
- **daum.net**
|
||||
- **DBTV**
|
||||
- **DCN**
|
||||
- **DctpTv**
|
||||
- **DeezerPlaylist**
|
||||
- **defense.gouv.fr**
|
||||
@ -351,7 +352,6 @@
|
||||
- **NowTV**
|
||||
- **nowvideo**: NowVideo
|
||||
- **npo**: npo.nl and ntr.nl
|
||||
- **npo**: npo.nl and ntr.nl
|
||||
- **npo.nl:live**
|
||||
- **npo.nl:radio**
|
||||
- **npo.nl:radio:fragment**
|
||||
@ -377,6 +377,7 @@
|
||||
- **parliamentlive.tv**: UK parliament videos
|
||||
- **Patreon**
|
||||
- **PBS**
|
||||
- **Periscope**: Periscope
|
||||
- **PhilharmonieDeParis**: Philharmonie de Paris
|
||||
- **Phoenix**
|
||||
- **Photobucket**
|
||||
@ -406,6 +407,7 @@
|
||||
- **qqmusic:playlist**: QQ音乐 - 歌单
|
||||
- **qqmusic:singer**: QQ音乐 - 歌手
|
||||
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
||||
- **Quickscope**: Quick Scope
|
||||
- **QuickVid**
|
||||
- **R7**
|
||||
- **radio.de**
|
||||
@ -518,6 +520,7 @@
|
||||
- **ted**
|
||||
- **TeleBruxelles**
|
||||
- **telecinco.es**
|
||||
- **Telegraaf**
|
||||
- **TeleMB**
|
||||
- **TeleTask**
|
||||
- **TenPlay**
|
||||
@ -621,6 +624,7 @@
|
||||
- **Vodlocker**
|
||||
- **VoiceRepublic**
|
||||
- **Vporn**
|
||||
- **vpro**: npo.nl and ntr.nl
|
||||
- **VRT**
|
||||
- **vube**: Vube.com
|
||||
- **VuClip**
|
||||
|
@ -133,8 +133,8 @@ def expect_info_dict(self, got_dict, expected_dict):
|
||||
elif isinstance(expected, compat_str) and expected.startswith('mincount:'):
|
||||
got = got_dict.get(info_field)
|
||||
self.assertTrue(
|
||||
isinstance(got, list),
|
||||
'Expected field %s to be a list, but it is of type %s' % (
|
||||
isinstance(got, (list, dict)),
|
||||
'Expected field %s to be a list or a dict, but it is of type %s' % (
|
||||
info_field, type(got).__name__))
|
||||
expected_num = int(expected.partition(':')[2])
|
||||
assertGreaterEqual(
|
||||
@ -160,7 +160,7 @@ def expect_info_dict(self, got_dict, expected_dict):
|
||||
# Are checkable fields missing from the test case definition?
|
||||
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
||||
for key, value in got_dict.items()
|
||||
if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
|
||||
if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location', 'age_limit'))
|
||||
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
|
||||
if missing_keys:
|
||||
def _repr(v):
|
||||
|
@ -136,7 +136,9 @@ def generator(test_case):
|
||||
# We're not using .download here sine that is just a shim
|
||||
# for outside error handling, and returns the exit code
|
||||
# instead of the result dict.
|
||||
res_dict = ydl.extract_info(test_case['url'])
|
||||
res_dict = ydl.extract_info(
|
||||
test_case['url'],
|
||||
force_generic_extractor=params.get('force_generic_extractor', False))
|
||||
except (DownloadError, ExtractorError) as err:
|
||||
# Check if the exception is not a network related one
|
||||
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
|
||||
|
@ -28,7 +28,6 @@ if os.name == 'nt':
|
||||
import ctypes
|
||||
|
||||
from .compat import (
|
||||
compat_basestring,
|
||||
compat_cookiejar,
|
||||
compat_expanduser,
|
||||
compat_get_terminal_size,
|
||||
@ -40,7 +39,6 @@ from .compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from .utils import (
|
||||
escape_url,
|
||||
ContentTooShortError,
|
||||
date_from_str,
|
||||
DateRange,
|
||||
@ -51,7 +49,6 @@ from .utils import (
|
||||
ExtractorError,
|
||||
format_bytes,
|
||||
formatSeconds,
|
||||
HEADRequest,
|
||||
locked_file,
|
||||
make_HTTPS_handler,
|
||||
MaxDownloadsReached,
|
||||
@ -1860,27 +1857,6 @@ class YoutubeDL(object):
|
||||
|
||||
def urlopen(self, req):
|
||||
""" Start an HTTP download """
|
||||
|
||||
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
|
||||
# always respected by websites, some tend to give out URLs with non percent-encoded
|
||||
# non-ASCII characters (see telemb.py, ard.py [#3412])
|
||||
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
|
||||
# To work around aforementioned issue we will replace request's original URL with
|
||||
# percent-encoded one
|
||||
req_is_string = isinstance(req, compat_basestring)
|
||||
url = req if req_is_string else req.get_full_url()
|
||||
url_escaped = escape_url(url)
|
||||
|
||||
# Substitute URL if any change after escaping
|
||||
if url != url_escaped:
|
||||
if req_is_string:
|
||||
req = url_escaped
|
||||
else:
|
||||
req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
|
||||
req = req_type(
|
||||
url_escaped, data=req.data, headers=req.headers,
|
||||
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
|
||||
|
||||
return self._opener.open(req, timeout=self._socket_timeout)
|
||||
|
||||
def print_debug_header(self):
|
||||
|
@ -45,11 +45,13 @@ class ExternalFD(FileDownloader):
|
||||
def supports(cls, info_dict):
|
||||
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
|
||||
|
||||
def _source_address(self, command_option):
|
||||
source_address = self.params.get('source_address')
|
||||
if source_address is None:
|
||||
def _option(self, command_option, param):
|
||||
param = self.params.get(param)
|
||||
if param is None:
|
||||
return []
|
||||
return [command_option, source_address]
|
||||
if isinstance(param, bool):
|
||||
return [command_option]
|
||||
return [command_option, param]
|
||||
|
||||
def _configuration_args(self, default=[]):
|
||||
ex_args = self.params.get('external_downloader_args')
|
||||
@ -77,7 +79,17 @@ class CurlFD(ExternalFD):
|
||||
cmd = [self.exe, '--location', '-o', tmpfilename]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._source_address('--interface')
|
||||
cmd += self._option('--interface', 'source_address')
|
||||
cmd += self._configuration_args()
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
|
||||
class AxelFD(ExternalFD):
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-o', tmpfilename]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['-H', '%s: %s' % (key, val)]
|
||||
cmd += self._configuration_args()
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
@ -88,7 +100,9 @@ class WgetFD(ExternalFD):
|
||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._source_address('--bind-address')
|
||||
cmd += self._option('--bind-address', 'source_address')
|
||||
cmd += self._option('--proxy', 'proxy')
|
||||
cmd += self._option('--no-check-certificate', 'nocheckcertificate')
|
||||
cmd += self._configuration_args()
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
@ -105,7 +119,8 @@ class Aria2cFD(ExternalFD):
|
||||
cmd += ['--out', os.path.basename(tmpfilename)]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._source_address('--interface')
|
||||
cmd += self._option('--interface', 'source_address')
|
||||
cmd += self._option('--all-proxy', 'proxy')
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
|
@ -118,6 +118,7 @@ from .dailymotion import (
|
||||
)
|
||||
from .daum import DaumIE
|
||||
from .dbtv import DBTVIE
|
||||
from .dcn import DCNIE
|
||||
from .dctp import DctpTvIE
|
||||
from .deezer import DeezerPlaylistIE
|
||||
from .dfb import DFBIE
|
||||
@ -431,6 +432,10 @@ from .orf import (
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .patreon import PatreonIE
|
||||
from .pbs import PBSIE
|
||||
from .periscope import (
|
||||
PeriscopeIE,
|
||||
QuickscopeIE,
|
||||
)
|
||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||
from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
@ -591,6 +596,7 @@ from .techtalks import TechTalksIE
|
||||
from .ted import TEDIE
|
||||
from .telebruxelles import TeleBruxellesIE
|
||||
from .telecinco import TelecincoIE
|
||||
from .telegraaf import TelegraafIE
|
||||
from .telemb import TeleMBIE
|
||||
from .teletask import TeleTaskIE
|
||||
from .tenplay import TenPlayIE
|
||||
|
@ -18,6 +18,7 @@ class BreakIE(InfoExtractor):
|
||||
'id': '2468056',
|
||||
'ext': 'mp4',
|
||||
'title': 'When Girls Act Like D-Bags',
|
||||
'age_limit': 13,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.break.com/video/ugc/baby-flex-2773063',
|
||||
|
@ -1,53 +1,68 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class ClipfishIE(InfoExtractor):
|
||||
IE_NAME = 'clipfish'
|
||||
|
||||
_VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
|
||||
_VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
|
||||
'md5': '2521cd644e862936cf2e698206e47385',
|
||||
'md5': '79bc922f3e8a9097b3d68a93780fd475',
|
||||
'info_dict': {
|
||||
'id': '3966754',
|
||||
'ext': 'mp4',
|
||||
'title': 'FIFA 14 - E3 2013 Trailer',
|
||||
'timestamp': 1370938118,
|
||||
'upload_date': '20130611',
|
||||
'duration': 82,
|
||||
},
|
||||
'skip': 'Blocked in the US'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
video_id = self._match_id(url)
|
||||
|
||||
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
|
||||
(video_id, int(time.time())))
|
||||
doc = self._download_xml(
|
||||
info_url, video_id, note='Downloading info page')
|
||||
title = doc.find('title').text
|
||||
video_url = doc.find('filename').text
|
||||
if video_url is None:
|
||||
xml_bytes = xml.etree.ElementTree.tostring(doc)
|
||||
raise ExtractorError('Cannot find video URL in document %r' %
|
||||
xml_bytes)
|
||||
thumbnail = doc.find('imageurl').text
|
||||
duration = parse_duration(doc.find('duration').text)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_info = self._parse_json(
|
||||
js_to_json(self._html_search_regex(
|
||||
'(?s)videoObject\s*=\s*({.+?});', webpage, 'video object')),
|
||||
video_id)
|
||||
|
||||
formats = []
|
||||
for video_url in re.findall(r'var\s+videourl\s*=\s*"([^"]+)"', webpage):
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.append({
|
||||
'url': video_url.replace('de.hls.fra.clipfish.de', 'hls.fra.clipfish.de'),
|
||||
'ext': 'mp4',
|
||||
'format_id': 'hls',
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - Video')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
duration = int_or_none(video_info.get('length'))
|
||||
timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage, 'upload date'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
@ -18,6 +18,7 @@ from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
@ -37,6 +38,9 @@ from ..utils import (
|
||||
RegexNotFoundError,
|
||||
sanitize_filename,
|
||||
unescapeHTML,
|
||||
url_basename,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
|
||||
@ -200,8 +204,8 @@ class InfoExtractor(object):
|
||||
There must be a key "entries", which is a list, an iterable, or a PagedList
|
||||
object, each element of which is a valid dictionary by this specification.
|
||||
|
||||
Additionally, playlists can have "title" and "id" attributes with the same
|
||||
semantics as videos (see above).
|
||||
Additionally, playlists can have "title", "description" and "id" attributes
|
||||
with the same semantics as videos (see above).
|
||||
|
||||
|
||||
_type "multi_video" indicates that there are multiple videos that
|
||||
@ -636,7 +640,7 @@ class InfoExtractor(object):
|
||||
@staticmethod
|
||||
def _meta_regex(prop):
|
||||
return r'''(?isx)<meta
|
||||
(?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
|
||||
(?=[^>]+(?:itemprop|name|property|id)=(["\']?)%s\1)
|
||||
[^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
|
||||
|
||||
def _og_search_property(self, prop, html, name=None, **kargs):
|
||||
@ -978,69 +982,210 @@ class InfoExtractor(object):
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
# TODO: improve extraction
|
||||
def _extract_smil_formats(self, smil_url, video_id, fatal=True):
|
||||
smil = self._download_xml(
|
||||
smil_url, video_id, 'Downloading SMIL file',
|
||||
'Unable to download SMIL file', fatal=fatal)
|
||||
@staticmethod
|
||||
def _xpath_ns(path, namespace=None):
|
||||
if not namespace:
|
||||
return path
|
||||
out = []
|
||||
for c in path.split('/'):
|
||||
if not c or c == '.':
|
||||
out.append(c)
|
||||
else:
|
||||
out.append('{%s}%s' % (namespace, c))
|
||||
return '/'.join(out)
|
||||
|
||||
def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None):
|
||||
smil = self._download_smil(smil_url, video_id, fatal=fatal)
|
||||
|
||||
if smil is False:
|
||||
assert not fatal
|
||||
return []
|
||||
|
||||
base = smil.find('./head/meta').get('base')
|
||||
namespace = self._parse_smil_namespace(smil)
|
||||
|
||||
return self._parse_smil_formats(
|
||||
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
|
||||
|
||||
def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
|
||||
smil = self._download_smil(smil_url, video_id, fatal=fatal)
|
||||
if smil is False:
|
||||
return {}
|
||||
return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
|
||||
|
||||
def _download_smil(self, smil_url, video_id, fatal=True):
|
||||
return self._download_xml(
|
||||
smil_url, video_id, 'Downloading SMIL file',
|
||||
'Unable to download SMIL file', fatal=fatal)
|
||||
|
||||
def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
|
||||
namespace = self._parse_smil_namespace(smil)
|
||||
|
||||
formats = self._parse_smil_formats(
|
||||
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
|
||||
subtitles = self._parse_smil_subtitles(smil, namespace=namespace)
|
||||
|
||||
video_id = os.path.splitext(url_basename(smil_url))[0]
|
||||
title = None
|
||||
description = None
|
||||
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
|
||||
name = meta.attrib.get('name')
|
||||
content = meta.attrib.get('content')
|
||||
if not name or not content:
|
||||
continue
|
||||
if not title and name == 'title':
|
||||
title = content
|
||||
elif not description and name in ('description', 'abstract'):
|
||||
description = content
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title or video_id,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _parse_smil_namespace(self, smil):
|
||||
return self._search_regex(
|
||||
r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
|
||||
|
||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None):
|
||||
base = smil_url
|
||||
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
|
||||
b = meta.get('base') or meta.get('httpBase')
|
||||
if b:
|
||||
base = b
|
||||
break
|
||||
|
||||
formats = []
|
||||
rtmp_count = 0
|
||||
if smil.findall('./body/seq/video'):
|
||||
video = smil.findall('./body/seq/video')[0]
|
||||
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
|
||||
formats.extend(fmts)
|
||||
else:
|
||||
for video in smil.findall('./body/switch/video'):
|
||||
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
|
||||
formats.extend(fmts)
|
||||
http_count = 0
|
||||
|
||||
videos = smil.findall(self._xpath_ns('.//video', namespace))
|
||||
for video in videos:
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
continue
|
||||
|
||||
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||
filesize = int_or_none(video.get('size') or video.get('fileSize'))
|
||||
width = int_or_none(video.get('width'))
|
||||
height = int_or_none(video.get('height'))
|
||||
proto = video.get('proto')
|
||||
ext = video.get('ext')
|
||||
src_ext = determine_ext(src)
|
||||
streamer = video.get('streamer') or base
|
||||
|
||||
if proto == 'rtmp' or streamer.startswith('rtmp'):
|
||||
rtmp_count += 1
|
||||
formats.append({
|
||||
'url': streamer,
|
||||
'play_path': src,
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
|
||||
'tbr': bitrate,
|
||||
'filesize': filesize,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
continue
|
||||
|
||||
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
|
||||
|
||||
if proto == 'm3u8' or src_ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src_url, video_id, ext or 'mp4', m3u8_id='hls'))
|
||||
continue
|
||||
|
||||
if src_ext == 'f4m':
|
||||
f4m_url = src_url
|
||||
if not f4m_params:
|
||||
f4m_params = {
|
||||
'hdcore': '3.2.0',
|
||||
'plugin': 'flowplayer-3.2.0.1',
|
||||
}
|
||||
f4m_url += '&' if '?' in f4m_url else '?'
|
||||
f4m_url += compat_urllib_parse.urlencode(f4m_params)
|
||||
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds'))
|
||||
continue
|
||||
|
||||
if src_url.startswith('http'):
|
||||
http_count += 1
|
||||
formats.append({
|
||||
'url': src_url,
|
||||
'ext': ext or src_ext or 'flv',
|
||||
'format_id': 'http-%d' % (bitrate or http_count),
|
||||
'tbr': bitrate,
|
||||
'filesize': filesize,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
continue
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return formats
|
||||
|
||||
def _parse_smil_video(self, video, video_id, base, rtmp_count):
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
return [], rtmp_count
|
||||
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||
width = int_or_none(video.get('width'))
|
||||
height = int_or_none(video.get('height'))
|
||||
proto = video.get('proto')
|
||||
if not proto:
|
||||
if base:
|
||||
if base.startswith('rtmp'):
|
||||
proto = 'rtmp'
|
||||
elif base.startswith('http'):
|
||||
proto = 'http'
|
||||
ext = video.get('ext')
|
||||
if proto == 'm3u8':
|
||||
return self._extract_m3u8_formats(src, video_id, ext), rtmp_count
|
||||
elif proto == 'rtmp':
|
||||
rtmp_count += 1
|
||||
streamer = video.get('streamer') or base
|
||||
return ([{
|
||||
'url': streamer,
|
||||
'play_path': src,
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
|
||||
'tbr': bitrate,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}], rtmp_count)
|
||||
elif proto.startswith('http'):
|
||||
return ([{
|
||||
'url': base + src,
|
||||
'ext': ext or 'flv',
|
||||
'tbr': bitrate,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}], rtmp_count)
|
||||
def _parse_smil_subtitles(self, smil, namespace=None):
|
||||
subtitles = {}
|
||||
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
|
||||
src = textstream.get('src')
|
||||
if not src:
|
||||
continue
|
||||
ext = textstream.get('ext') or determine_ext(src)
|
||||
if not ext:
|
||||
type_ = textstream.get('type')
|
||||
if type_ == 'text/srt':
|
||||
ext = 'srt'
|
||||
lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName')
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': src,
|
||||
'ext': ext,
|
||||
})
|
||||
return subtitles
|
||||
|
||||
def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True):
|
||||
xspf = self._download_xml(
|
||||
playlist_url, playlist_id, 'Downloading xpsf playlist',
|
||||
'Unable to download xspf manifest', fatal=fatal)
|
||||
if xspf is False:
|
||||
return []
|
||||
return self._parse_xspf(xspf, playlist_id)
|
||||
|
||||
def _parse_xspf(self, playlist, playlist_id):
|
||||
NS_MAP = {
|
||||
'xspf': 'http://xspf.org/ns/0/',
|
||||
's1': 'http://static.streamone.nl/player/ns/0',
|
||||
}
|
||||
|
||||
entries = []
|
||||
for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
|
||||
title = xpath_text(
|
||||
track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
|
||||
description = xpath_text(
|
||||
track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
|
||||
thumbnail = xpath_text(
|
||||
track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail')
|
||||
duration = float_or_none(
|
||||
xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
|
||||
|
||||
formats = [{
|
||||
'url': location.text,
|
||||
'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
|
||||
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
|
||||
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
|
||||
} for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': playlist_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
})
|
||||
return entries
|
||||
|
||||
def _live_title(self, name):
|
||||
""" Generate the title for a live video """
|
||||
|
84
youtube_dl/extractor/dcn.py
Normal file
84
youtube_dl/extractor/dcn.py
Normal file
@ -0,0 +1,84 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class DCNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/.+|show/\d+/.+?)/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.dcndigital.ae/#/show/199074/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375/6887',
|
||||
'info_dict':
|
||||
{
|
||||
'id': '17375',
|
||||
'ext': 'mp4',
|
||||
'title': 'رحلة العمر : الحلقة 1',
|
||||
'description': 'md5:0156e935d870acb8ef0a66d24070c6d6',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 2041,
|
||||
'timestamp': 1227504126,
|
||||
'upload_date': '20081124',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
|
||||
headers={'Origin': 'http://www.dcndigital.ae'})
|
||||
|
||||
video = self._download_json(request, video_id)
|
||||
title = video.get('title_en') or video['title_ar']
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?'
|
||||
+ compat_urllib_parse.urlencode({
|
||||
'id': video['id'],
|
||||
'user_id': video['user_id'],
|
||||
'signature': video['signature'],
|
||||
'countries': 'Q0M=',
|
||||
'filter': 'DENY',
|
||||
}), video_id)
|
||||
|
||||
m3u8_url = self._html_search_regex(r'file:\s*"([^"]+)', webpage, 'm3u8 url')
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
|
||||
rtsp_url = self._search_regex(
|
||||
r'<a[^>]+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False)
|
||||
if rtsp_url:
|
||||
formats.append({
|
||||
'url': rtsp_url,
|
||||
'format_id': 'rtsp',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
img = video.get('img')
|
||||
thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None
|
||||
duration = int_or_none(video.get('duration'))
|
||||
description = video.get('description_en') or video.get('description_ar')
|
||||
timestamp = parse_iso8601(video.get('create_time') or video.get('update_time'), ' ')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
@ -34,24 +34,14 @@ class DHMIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
playlist_url = self._search_regex(
|
||||
r"file\s*:\s*'([^']+)'", webpage, 'playlist url')
|
||||
|
||||
playlist = self._download_xml(playlist_url, video_id)
|
||||
|
||||
track = playlist.find(
|
||||
'./{http://xspf.org/ns/0/}trackList/{http://xspf.org/ns/0/}track')
|
||||
|
||||
video_url = xpath_text(
|
||||
track, './{http://xspf.org/ns/0/}location',
|
||||
'video url', fatal=True)
|
||||
thumbnail = xpath_text(
|
||||
track, './{http://xspf.org/ns/0/}image',
|
||||
'thumbnail')
|
||||
entries = self._extract_xspf_playlist(playlist_url, playlist_id)
|
||||
|
||||
title = self._search_regex(
|
||||
[r'dc:title="([^"]+)"', r'<title> »([^<]+)</title>'],
|
||||
@ -63,11 +53,10 @@ class DHMIE(InfoExtractor):
|
||||
r'<em>Length\s*</em>\s*:\s*</strong>([^<]+)',
|
||||
webpage, 'duration', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
entries[0].update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
})
|
||||
|
||||
return self.playlist_result(entries, playlist_id)
|
||||
|
@ -86,7 +86,7 @@ class FC2IE(InfoExtractor):
|
||||
|
||||
info_url = (
|
||||
"http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&".
|
||||
format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.', '%2E')))
|
||||
format(video_id, mimi, compat_urllib_request.quote(refer, safe=b'').replace('.', '%2E')))
|
||||
|
||||
info_webpage = self._download_webpage(
|
||||
info_url, video_id, note='Downloading info page')
|
||||
|
@ -32,6 +32,7 @@ class FourTubeIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -130,6 +130,89 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
||||
}
|
||||
},
|
||||
# SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
|
||||
{
|
||||
'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
|
||||
'info_dict': {
|
||||
'id': 'smil',
|
||||
'ext': 'mp4',
|
||||
'title': 'Automatics, robotics and biocybernetics',
|
||||
'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
|
||||
'formats': 'mincount:16',
|
||||
'subtitles': 'mincount:1',
|
||||
},
|
||||
'params': {
|
||||
'force_generic_extractor': True,
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
|
||||
{
|
||||
'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
|
||||
'info_dict': {
|
||||
'id': 'hds',
|
||||
'ext': 'flv',
|
||||
'title': 'hds',
|
||||
'formats': 'mincount:1',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# SMIL from https://www.restudy.dk/video/play/id/1637
|
||||
{
|
||||
'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
|
||||
'info_dict': {
|
||||
'id': 'video_1637',
|
||||
'ext': 'flv',
|
||||
'title': 'video_1637',
|
||||
'formats': 'mincount:3',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
|
||||
{
|
||||
'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
|
||||
'info_dict': {
|
||||
'id': 'smil-service',
|
||||
'ext': 'flv',
|
||||
'title': 'smil-service',
|
||||
'formats': 'mincount:1',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
|
||||
{
|
||||
'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
|
||||
'info_dict': {
|
||||
'id': '4719370',
|
||||
'ext': 'mp4',
|
||||
'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
|
||||
'formats': 'mincount:3',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
|
||||
{
|
||||
'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
|
||||
'info_dict': {
|
||||
'id': 'mZlp2ctYIUEB',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tikibad ontruimd wegens brand',
|
||||
'description': 'md5:05ca046ff47b931f9b04855015e163a4',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 33,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# google redirect
|
||||
{
|
||||
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
||||
@ -236,6 +319,19 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
},
|
||||
{
|
||||
# ooyala video embedded with http://player.ooyala.com/iframe.js
|
||||
'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
|
||||
'info_dict': {
|
||||
'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
|
||||
'ext': 'mp4',
|
||||
'title': '"Steve Jobs: Man in the Machine" trailer',
|
||||
'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# multiple ooyala embeds on SBN network websites
|
||||
{
|
||||
'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
||||
@ -1110,11 +1206,15 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
# Is it an RSS feed?
|
||||
# Is it an RSS feed, a SMIL file or a XSPF playlist?
|
||||
try:
|
||||
doc = parse_xml(webpage)
|
||||
if doc.tag == 'rss':
|
||||
return self._extract_rss(url, video_id, doc)
|
||||
elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
|
||||
return self._parse_smil(doc, url, video_id)
|
||||
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
|
||||
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
|
||||
except compat_xml_parse_error:
|
||||
pass
|
||||
|
||||
@ -1320,7 +1420,7 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for Ooyala videos
|
||||
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||
mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
|
||||
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
|
||||
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
||||
@ -1716,7 +1816,8 @@ class GenericIE(InfoExtractor):
|
||||
# here's a fun little line of code for you:
|
||||
video_id = os.path.splitext(video_id)[0]
|
||||
|
||||
if determine_ext(video_url) == 'smil':
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'smil':
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'formats': self._extract_smil_formats(video_url, video_id),
|
||||
@ -1724,6 +1825,8 @@ class GenericIE(InfoExtractor):
|
||||
'title': video_title,
|
||||
'age_limit': age_limit,
|
||||
})
|
||||
elif ext == 'xspf':
|
||||
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
|
||||
else:
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
|
@ -201,7 +201,7 @@ class IqiyiIE(InfoExtractor):
|
||||
return raw_data
|
||||
|
||||
def get_enc_key(self, swf_url, video_id):
|
||||
enc_key = '8e29ab5666d041c3a1ea76e06dabdffb'
|
||||
enc_key = '3601ba290e4f4662848c710e2122007e' # last update at 2015-08-10 for Zombie
|
||||
return enc_key
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class NowTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nowtv\.de/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/(?:player|preview)'
|
||||
_VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/(?:player|preview)'
|
||||
|
||||
_TESTS = [{
|
||||
# rtl
|
||||
@ -127,6 +127,9 @@ class NowTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.nowtv.de/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.nowtv.at/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview?return=/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -407,6 +407,7 @@ class NPORadioFragmentIE(InfoExtractor):
|
||||
|
||||
|
||||
class VPROIE(NPOIE):
|
||||
IE_NAME = 'vpro'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
|
||||
|
||||
_TESTS = [
|
||||
|
@ -16,15 +16,17 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
# metadata in JSON
|
||||
'url': 'http://ok.ru/video/20079905452',
|
||||
'md5': '8e24ad2da6f387948e7a7d44eb8668fe',
|
||||
'md5': '6ba728d85d60aa2e6dd37c9e70fdc6bc',
|
||||
'info_dict': {
|
||||
'id': '20079905452',
|
||||
'ext': 'mp4',
|
||||
'title': 'Культура меняет нас (прекрасный ролик!))',
|
||||
'duration': 100,
|
||||
'upload_date': '20141207',
|
||||
'uploader_id': '330537914540',
|
||||
'uploader': 'Виталий Добровольский',
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
# metadataUrl
|
||||
@ -35,9 +37,11 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Девушка без комплексов ...',
|
||||
'duration': 191,
|
||||
'upload_date': '20150518',
|
||||
'uploader_id': '534380003155',
|
||||
'uploader': 'Андрей Мещанинов',
|
||||
'uploader': '☭ Андрей Мещанинов ☭',
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
|
||||
|
@ -92,6 +92,7 @@ class PBSIE(InfoExtractor):
|
||||
'duration': 3172,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'upload_date': '20140122',
|
||||
'age_limit': 10,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
|
99
youtube_dl/extractor/periscope.py
Normal file
99
youtube_dl/extractor/periscope.py
Normal file
@ -0,0 +1,99 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import parse_iso8601
|
||||
|
||||
|
||||
class PeriscopeIE(InfoExtractor):
|
||||
IE_DESC = 'Periscope'
|
||||
_VALID_URL = r'https?://(?:www\.)?periscope\.tv/w/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
|
||||
'md5': '65b57957972e503fcbbaeed8f4fa04ca',
|
||||
'info_dict': {
|
||||
'id': '56102209',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bec Boop - 🚠✈️🇬🇧 Fly above #London in Emirates Air Line cable car at night 🇬🇧✈️🚠 #BoopScope 🎀💗',
|
||||
'timestamp': 1438978559,
|
||||
'upload_date': '20150807',
|
||||
'uploader': 'Bec Boop',
|
||||
'uploader_id': '1465763',
|
||||
},
|
||||
'skip': 'Expires in 24 hours',
|
||||
}
|
||||
|
||||
def _call_api(self, method, token):
|
||||
return self._download_json(
|
||||
'https://api.periscope.tv/api/v2/%s?token=%s' % (method, token), token)
|
||||
|
||||
def _real_extract(self, url):
|
||||
token = self._match_id(url)
|
||||
|
||||
broadcast_data = self._call_api('getBroadcastPublic', token)
|
||||
broadcast = broadcast_data['broadcast']
|
||||
status = broadcast['status']
|
||||
|
||||
uploader = broadcast.get('user_display_name') or broadcast_data.get('user', {}).get('display_name')
|
||||
uploader_id = broadcast.get('user_id') or broadcast_data.get('user', {}).get('id')
|
||||
|
||||
title = '%s - %s' % (uploader, status) if uploader else status
|
||||
state = broadcast.get('state').lower()
|
||||
if state == 'running':
|
||||
title = self._live_title(title)
|
||||
timestamp = parse_iso8601(broadcast.get('created_at'))
|
||||
|
||||
thumbnails = [{
|
||||
'url': broadcast[image],
|
||||
} for image in ('image_url', 'image_url_small') if broadcast.get(image)]
|
||||
|
||||
stream = self._call_api('getAccessPublic', token)
|
||||
|
||||
formats = []
|
||||
for format_id in ('replay', 'rtmp', 'hls', 'https_hls'):
|
||||
video_url = stream.get(format_id + '_url')
|
||||
if not video_url:
|
||||
continue
|
||||
f = {
|
||||
'url': video_url,
|
||||
'ext': 'flv' if format_id == 'rtmp' else 'mp4',
|
||||
}
|
||||
if format_id != 'rtmp':
|
||||
f['protocol'] = 'm3u8_native' if state == 'ended' else 'm3u8'
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': broadcast.get('id') or token,
|
||||
'title': title,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class QuickscopeIE(InfoExtractor):
|
||||
IE_DESC = 'Quick Scope'
|
||||
_VALID_URL = r'https?://watchonperiscope\.com/broadcast/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://watchonperiscope.com/broadcast/56180087',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
broadcast_id = self._match_id(url)
|
||||
request = compat_urllib_request.Request(
|
||||
'https://watchonperiscope.com/api/accessChannel', compat_urllib_parse.urlencode({
|
||||
'broadcast_id': broadcast_id,
|
||||
'entry_ticket': '',
|
||||
'from_push': 'false',
|
||||
'uses_sessions': 'true',
|
||||
}).encode('utf-8'))
|
||||
return self.url_result(
|
||||
self._download_json(request, broadcast_id)['share_url'], 'Periscope')
|
@ -22,6 +22,7 @@ class Porn91IE(InfoExtractor):
|
||||
'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!',
|
||||
'ext': 'mp4',
|
||||
'duration': 431,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
@ -68,4 +69,5 @@ class Porn91IE(InfoExtractor):
|
||||
'url': video_url,
|
||||
'duration': duration,
|
||||
'comment_count': comment_count,
|
||||
'age_limit': self._rta_search(webpage),
|
||||
}
|
||||
|
@ -30,6 +30,7 @@ class RutubeIE(InfoExtractor):
|
||||
'uploader': 'NTDRussian',
|
||||
'uploader_id': '29790',
|
||||
'upload_date': '20131016',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
# It requires ffmpeg (m3u8 download)
|
||||
|
@ -29,6 +29,7 @@ class SexyKarmaIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html',
|
||||
|
@ -45,6 +45,14 @@ class SouthParkDeIE(SouthParkIE):
|
||||
'title': 'The Government Won\'t Respect My Privacy',
|
||||
'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
|
||||
},
|
||||
}, {
|
||||
# non-ASCII characters in initial URL
|
||||
'url': 'http://www.southpark.de/alle-episoden/s18e09-hashtag-aufwärmen',
|
||||
'playlist_count': 4,
|
||||
}, {
|
||||
# non-ASCII characters in redirect URL
|
||||
'url': 'http://www.southpark.de/alle-episoden/s18e09',
|
||||
'playlist_count': 4,
|
||||
}]
|
||||
|
||||
|
||||
|
35
youtube_dl/extractor/telegraaf.py
Normal file
35
youtube_dl/extractor/telegraaf.py
Normal file
@ -0,0 +1,35 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_end
|
||||
|
||||
|
||||
class TelegraafIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/tv/(?:[^/]+/)+(?P<id>\d+)/[^/]+\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html',
|
||||
'md5': '83245a9779bcc4a24454bfd53c65b6dc',
|
||||
'info_dict': {
|
||||
'id': '24353229',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tikibad ontruimd wegens brand',
|
||||
'description': 'md5:05ca046ff47b931f9b04855015e163a4',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 33,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
playlist_url = self._search_regex(
|
||||
r"iframe\.loadPlayer\('([^']+)'", webpage, 'player')
|
||||
|
||||
entries = self._extract_xspf_playlist(playlist_url, playlist_id)
|
||||
title = remove_end(self._og_search_title(webpage), ' - VIDEO')
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
@ -104,6 +104,7 @@ class TVPlayIE(InfoExtractor):
|
||||
'duration': 1492,
|
||||
'timestamp': 1330522854,
|
||||
'upload_date': '20120229',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
|
@ -13,7 +13,7 @@ class TweakersIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tweakers\.net/video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://tweakers.net/video/9926/new-nintendo-3ds-xl-op-alle-fronten-beter.html',
|
||||
'md5': '1b5afa817403bb5baa08359dca31e6df',
|
||||
'md5': '3147e4ddad366f97476a93863e4557c8',
|
||||
'info_dict': {
|
||||
'id': '9926',
|
||||
'ext': 'mp4',
|
||||
@ -25,41 +25,7 @@ class TweakersIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
playlist = self._download_xml(
|
||||
'https://tweakers.net/video/s1playlist/%s/playlist.xspf' % video_id,
|
||||
video_id)
|
||||
|
||||
NS_MAP = {
|
||||
'xspf': 'http://xspf.org/ns/0/',
|
||||
's1': 'http://static.streamone.nl/player/ns/0',
|
||||
}
|
||||
|
||||
track = playlist.find(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP))
|
||||
|
||||
title = xpath_text(
|
||||
track, xpath_with_ns('./xspf:title', NS_MAP), 'title')
|
||||
description = xpath_text(
|
||||
track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
|
||||
thumbnail = xpath_text(
|
||||
track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail')
|
||||
duration = float_or_none(
|
||||
xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'),
|
||||
1000)
|
||||
|
||||
formats = [{
|
||||
'url': location.text,
|
||||
'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
|
||||
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
|
||||
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
|
||||
} for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
playlist_id = self._match_id(url)
|
||||
entries = self._extract_xspf_playlist(
|
||||
'https://tweakers.net/video/s1playlist/%s/playlist.xspf' % playlist_id, playlist_id)
|
||||
return self.playlist_result(entries, playlist_id)
|
||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class VideoLecturesNetIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/'
|
||||
_VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/*(?:[#?].*)?$'
|
||||
IE_NAME = 'videolectures.net'
|
||||
|
||||
_TEST = {
|
||||
|
@ -29,6 +29,7 @@ from ..utils import (
|
||||
class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
_NETRC_MACHINE = 'vimeo'
|
||||
_LOGIN_REQUIRED = False
|
||||
_LOGIN_URL = 'https://vimeo.com/log_in'
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
@ -37,21 +38,25 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||
return
|
||||
self.report_login()
|
||||
login_url = 'https://vimeo.com/log_in'
|
||||
webpage = self._download_webpage(login_url, None, False)
|
||||
token = self._search_regex(r'xsrft":"(.*?)"', webpage, 'login token')
|
||||
webpage = self._download_webpage(self._LOGIN_URL, None, False)
|
||||
token = self._extract_xsrft(webpage)
|
||||
data = urlencode_postdata({
|
||||
'action': 'login',
|
||||
'email': username,
|
||||
'password': password,
|
||||
'action': 'login',
|
||||
'service': 'vimeo',
|
||||
'token': token,
|
||||
})
|
||||
login_request = compat_urllib_request.Request(login_url, data)
|
||||
login_request = compat_urllib_request.Request(self._LOGIN_URL, data)
|
||||
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
login_request.add_header('Cookie', 'xsrft=%s' % token)
|
||||
login_request.add_header('Referer', self._LOGIN_URL)
|
||||
self._download_webpage(login_request, None, False, 'Wrong login info')
|
||||
|
||||
def _extract_xsrft(self, webpage):
|
||||
return self._search_regex(
|
||||
r'xsrft\s*[=:]\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
|
||||
webpage, 'login token', group='xsrft')
|
||||
|
||||
|
||||
class VimeoIE(VimeoBaseInfoExtractor):
|
||||
"""Information extractor for vimeo.com."""
|
||||
@ -193,7 +198,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
password = self._downloader.params.get('videopassword', None)
|
||||
if password is None:
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
|
||||
token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
|
||||
token = self._extract_xsrft(webpage)
|
||||
data = urlencode_postdata({
|
||||
'password': password,
|
||||
'token': token,
|
||||
@ -203,7 +208,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
url = url.replace('http://', 'https://')
|
||||
password_request = compat_urllib_request.Request(url + '/password', data)
|
||||
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
password_request.add_header('Cookie', 'xsrft=%s' % token)
|
||||
password_request.add_header('Referer', url)
|
||||
return self._download_webpage(
|
||||
password_request, video_id,
|
||||
'Verifying the password', 'Wrong password')
|
||||
@ -422,10 +427,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class VimeoChannelIE(InfoExtractor):
|
||||
class VimeoChannelIE(VimeoBaseInfoExtractor):
|
||||
IE_NAME = 'vimeo:channel'
|
||||
_VALID_URL = r'https://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
|
||||
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
|
||||
_TITLE = None
|
||||
_TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
|
||||
_TESTS = [{
|
||||
'url': 'https://vimeo.com/channels/tributes',
|
||||
@ -440,7 +446,7 @@ class VimeoChannelIE(InfoExtractor):
|
||||
return '%s/videos/page:%d/' % (base_url, pagenum)
|
||||
|
||||
def _extract_list_title(self, webpage):
|
||||
return self._html_search_regex(self._TITLE_RE, webpage, 'list title')
|
||||
return self._TITLE or self._html_search_regex(self._TITLE_RE, webpage, 'list title')
|
||||
|
||||
def _login_list_password(self, page_url, list_id, webpage):
|
||||
login_form = self._search_regex(
|
||||
@ -453,7 +459,7 @@ class VimeoChannelIE(InfoExtractor):
|
||||
if password is None:
|
||||
raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
|
||||
fields = self._hidden_inputs(login_form)
|
||||
token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
|
||||
token = self._extract_xsrft(webpage)
|
||||
fields['token'] = token
|
||||
fields['password'] = password
|
||||
post = urlencode_postdata(fields)
|
||||
@ -499,7 +505,7 @@ class VimeoChannelIE(InfoExtractor):
|
||||
|
||||
class VimeoUserIE(VimeoChannelIE):
|
||||
IE_NAME = 'vimeo:user'
|
||||
_VALID_URL = r'https://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
|
||||
_VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
|
||||
_TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
|
||||
_TESTS = [{
|
||||
'url': 'https://vimeo.com/nkistudio/videos',
|
||||
@ -603,14 +609,14 @@ class VimeoReviewIE(InfoExtractor):
|
||||
return self.url_result(player_url, 'Vimeo', video_id)
|
||||
|
||||
|
||||
class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
|
||||
class VimeoWatchLaterIE(VimeoChannelIE):
|
||||
IE_NAME = 'vimeo:watchlater'
|
||||
IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)'
|
||||
_VALID_URL = r'https://vimeo\.com/home/watchlater|:vimeowatchlater'
|
||||
_VALID_URL = r'https://vimeo\.com/(?:home/)?watchlater|:vimeowatchlater'
|
||||
_TITLE = 'Watch Later'
|
||||
_LOGIN_REQUIRED = True
|
||||
_TITLE_RE = r'href="/home/watchlater".*?>(.*?)<'
|
||||
_TESTS = [{
|
||||
'url': 'https://vimeo.com/home/watchlater',
|
||||
'url': 'https://vimeo.com/watchlater',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@ -626,7 +632,7 @@ class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
|
||||
return request
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self._extract_videos('watchlater', 'https://vimeo.com/home/watchlater')
|
||||
return self._extract_videos('watchlater', 'https://vimeo.com/watchlater')
|
||||
|
||||
|
||||
class VimeoLikesIE(InfoExtractor):
|
||||
|
@ -213,7 +213,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|(?: # or the v= param in all its forms
|
||||
(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
|
||||
(?:\?|\#!?) # the params delimiter ? or # or #!
|
||||
(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
|
||||
(?:.*?&)?? # any other preceding param (like /?s=tuff&v=xxxx)
|
||||
v=
|
||||
)
|
||||
))
|
||||
@ -365,6 +365,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'description': 'md5:64249768eec3bc4276236606ea996373',
|
||||
'uploader': 'justintimberlakeVEVO',
|
||||
'uploader_id': 'justintimberlakeVEVO',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -380,6 +381,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': 'setindia'
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY',
|
||||
'note': 'Use the first video ID in the URL',
|
||||
'info_dict': {
|
||||
'id': 'BaW_jenozKc',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video "\'/\\ä↭𝕐',
|
||||
'uploader': 'Philipp Hagemeister',
|
||||
'uploader_id': 'phihag',
|
||||
'upload_date': '20121002',
|
||||
'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
|
||||
'categories': ['Science & Technology'],
|
||||
'tags': ['youtube-dl'],
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
|
||||
'note': '256k DASH audio (format 141) via DASH manifest',
|
||||
@ -421,7 +442,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'id': 'nfWlot6h_JM',
|
||||
'ext': 'm4a',
|
||||
'title': 'Taylor Swift - Shake It Off',
|
||||
'description': 'md5:2acfda1b285bdd478ccec22f9918199d',
|
||||
'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
|
||||
'uploader': 'TaylorSwiftVEVO',
|
||||
'uploader_id': 'TaylorSwiftVEVO',
|
||||
'upload_date': '20140818',
|
||||
@ -455,6 +476,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'The Witcher',
|
||||
'uploader_id': 'WitcherGame',
|
||||
'upload_date': '20140605',
|
||||
'age_limit': 18,
|
||||
},
|
||||
},
|
||||
# Age-gate video with encrypted signature
|
||||
@ -468,6 +490,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'LloydVEVO',
|
||||
'uploader_id': 'LloydVEVO',
|
||||
'upload_date': '20110629',
|
||||
'age_limit': 18,
|
||||
},
|
||||
},
|
||||
# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
|
||||
@ -492,7 +515,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'lqQg6PlCWgI',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20120731',
|
||||
'upload_date': '20120724',
|
||||
'uploader_id': 'olympic',
|
||||
'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
|
||||
'uploader': 'Olympics',
|
||||
@ -521,7 +544,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'url': 'qEJwOuvDf7I',
|
||||
'info_dict': {
|
||||
'id': 'qEJwOuvDf7I',
|
||||
'ext': 'mp4',
|
||||
'ext': 'webm',
|
||||
'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
|
||||
'description': '',
|
||||
'upload_date': '20150404',
|
||||
|
@ -651,6 +651,26 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
||||
return ret
|
||||
|
||||
def http_request(self, req):
|
||||
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
|
||||
# always respected by websites, some tend to give out URLs with non percent-encoded
|
||||
# non-ASCII characters (see telemb.py, ard.py [#3412])
|
||||
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
|
||||
# To work around aforementioned issue we will replace request's original URL with
|
||||
# percent-encoded one
|
||||
# Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
|
||||
# the code of this workaround has been moved here from YoutubeDL.urlopen()
|
||||
url = req.get_full_url()
|
||||
url_escaped = escape_url(url)
|
||||
|
||||
# Substitute URL if any change after escaping
|
||||
if url != url_escaped:
|
||||
req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
|
||||
new_req = req_type(
|
||||
url_escaped, data=req.data, headers=req.headers,
|
||||
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
|
||||
new_req.timeout = req.timeout
|
||||
req = new_req
|
||||
|
||||
for h, v in std_headers.items():
|
||||
# Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
|
||||
# The dict keys are capitalized because of this bug by urllib
|
||||
@ -695,6 +715,17 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
||||
gz = io.BytesIO(self.deflate(resp.read()))
|
||||
resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
|
||||
resp.msg = old_resp.msg
|
||||
# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986
|
||||
if 300 <= resp.code < 400:
|
||||
location = resp.headers.get('Location')
|
||||
if location:
|
||||
# As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
|
||||
if sys.version_info >= (3, 0):
|
||||
location = location.encode('iso-8859-1').decode('utf-8')
|
||||
location_escaped = escape_url(location)
|
||||
if location != location_escaped:
|
||||
del resp.headers['Location']
|
||||
resp.headers['Location'] = location_escaped
|
||||
return resp
|
||||
|
||||
https_request = http_request
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2015.07.28'
|
||||
__version__ = '2015.08.09'
|
||||
|
Loading…
x
Reference in New Issue
Block a user