commit
cf663e548f
@ -108,7 +108,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--playlist-reverse Download playlist videos in reverse order
|
--playlist-reverse Download playlist videos in reverse order
|
||||||
--xattr-set-filesize Set file xattribute ytdl.filesize with expected filesize (experimental)
|
--xattr-set-filesize Set file xattribute ytdl.filesize with expected filesize (experimental)
|
||||||
--hls-prefer-native Use the native HLS downloader instead of ffmpeg (experimental)
|
--hls-prefer-native Use the native HLS downloader instead of ffmpeg (experimental)
|
||||||
--external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,httpie,wget
|
--external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,axel,curl,httpie,wget
|
||||||
--external-downloader-args ARGS Give these arguments to the external downloader
|
--external-downloader-args ARGS Give these arguments to the external downloader
|
||||||
|
|
||||||
## Filesystem Options:
|
## Filesystem Options:
|
||||||
|
@ -86,7 +86,7 @@
|
|||||||
- **chirbit:profile**
|
- **chirbit:profile**
|
||||||
- **Cinchcast**
|
- **Cinchcast**
|
||||||
- **Cinemassacre**
|
- **Cinemassacre**
|
||||||
- **clipfish**
|
- **Clipfish**
|
||||||
- **cliphunter**
|
- **cliphunter**
|
||||||
- **Clipsyndicate**
|
- **Clipsyndicate**
|
||||||
- **Cloudy**
|
- **Cloudy**
|
||||||
@ -116,6 +116,7 @@
|
|||||||
- **DailymotionCloud**
|
- **DailymotionCloud**
|
||||||
- **daum.net**
|
- **daum.net**
|
||||||
- **DBTV**
|
- **DBTV**
|
||||||
|
- **DCN**
|
||||||
- **DctpTv**
|
- **DctpTv**
|
||||||
- **DeezerPlaylist**
|
- **DeezerPlaylist**
|
||||||
- **defense.gouv.fr**
|
- **defense.gouv.fr**
|
||||||
@ -351,7 +352,6 @@
|
|||||||
- **NowTV**
|
- **NowTV**
|
||||||
- **nowvideo**: NowVideo
|
- **nowvideo**: NowVideo
|
||||||
- **npo**: npo.nl and ntr.nl
|
- **npo**: npo.nl and ntr.nl
|
||||||
- **npo**: npo.nl and ntr.nl
|
|
||||||
- **npo.nl:live**
|
- **npo.nl:live**
|
||||||
- **npo.nl:radio**
|
- **npo.nl:radio**
|
||||||
- **npo.nl:radio:fragment**
|
- **npo.nl:radio:fragment**
|
||||||
@ -377,6 +377,7 @@
|
|||||||
- **parliamentlive.tv**: UK parliament videos
|
- **parliamentlive.tv**: UK parliament videos
|
||||||
- **Patreon**
|
- **Patreon**
|
||||||
- **PBS**
|
- **PBS**
|
||||||
|
- **Periscope**: Periscope
|
||||||
- **PhilharmonieDeParis**: Philharmonie de Paris
|
- **PhilharmonieDeParis**: Philharmonie de Paris
|
||||||
- **Phoenix**
|
- **Phoenix**
|
||||||
- **Photobucket**
|
- **Photobucket**
|
||||||
@ -406,6 +407,7 @@
|
|||||||
- **qqmusic:playlist**: QQ音乐 - 歌单
|
- **qqmusic:playlist**: QQ音乐 - 歌单
|
||||||
- **qqmusic:singer**: QQ音乐 - 歌手
|
- **qqmusic:singer**: QQ音乐 - 歌手
|
||||||
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
||||||
|
- **Quickscope**: Quick Scope
|
||||||
- **QuickVid**
|
- **QuickVid**
|
||||||
- **R7**
|
- **R7**
|
||||||
- **radio.de**
|
- **radio.de**
|
||||||
@ -518,6 +520,7 @@
|
|||||||
- **ted**
|
- **ted**
|
||||||
- **TeleBruxelles**
|
- **TeleBruxelles**
|
||||||
- **telecinco.es**
|
- **telecinco.es**
|
||||||
|
- **Telegraaf**
|
||||||
- **TeleMB**
|
- **TeleMB**
|
||||||
- **TeleTask**
|
- **TeleTask**
|
||||||
- **TenPlay**
|
- **TenPlay**
|
||||||
@ -621,6 +624,7 @@
|
|||||||
- **Vodlocker**
|
- **Vodlocker**
|
||||||
- **VoiceRepublic**
|
- **VoiceRepublic**
|
||||||
- **Vporn**
|
- **Vporn**
|
||||||
|
- **vpro**: npo.nl and ntr.nl
|
||||||
- **VRT**
|
- **VRT**
|
||||||
- **vube**: Vube.com
|
- **vube**: Vube.com
|
||||||
- **VuClip**
|
- **VuClip**
|
||||||
|
@ -133,8 +133,8 @@ def expect_info_dict(self, got_dict, expected_dict):
|
|||||||
elif isinstance(expected, compat_str) and expected.startswith('mincount:'):
|
elif isinstance(expected, compat_str) and expected.startswith('mincount:'):
|
||||||
got = got_dict.get(info_field)
|
got = got_dict.get(info_field)
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
isinstance(got, list),
|
isinstance(got, (list, dict)),
|
||||||
'Expected field %s to be a list, but it is of type %s' % (
|
'Expected field %s to be a list or a dict, but it is of type %s' % (
|
||||||
info_field, type(got).__name__))
|
info_field, type(got).__name__))
|
||||||
expected_num = int(expected.partition(':')[2])
|
expected_num = int(expected.partition(':')[2])
|
||||||
assertGreaterEqual(
|
assertGreaterEqual(
|
||||||
@ -160,7 +160,7 @@ def expect_info_dict(self, got_dict, expected_dict):
|
|||||||
# Are checkable fields missing from the test case definition?
|
# Are checkable fields missing from the test case definition?
|
||||||
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
||||||
for key, value in got_dict.items()
|
for key, value in got_dict.items()
|
||||||
if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
|
if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location', 'age_limit'))
|
||||||
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
|
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
|
||||||
if missing_keys:
|
if missing_keys:
|
||||||
def _repr(v):
|
def _repr(v):
|
||||||
|
@ -136,7 +136,9 @@ def generator(test_case):
|
|||||||
# We're not using .download here sine that is just a shim
|
# We're not using .download here sine that is just a shim
|
||||||
# for outside error handling, and returns the exit code
|
# for outside error handling, and returns the exit code
|
||||||
# instead of the result dict.
|
# instead of the result dict.
|
||||||
res_dict = ydl.extract_info(test_case['url'])
|
res_dict = ydl.extract_info(
|
||||||
|
test_case['url'],
|
||||||
|
force_generic_extractor=params.get('force_generic_extractor', False))
|
||||||
except (DownloadError, ExtractorError) as err:
|
except (DownloadError, ExtractorError) as err:
|
||||||
# Check if the exception is not a network related one
|
# Check if the exception is not a network related one
|
||||||
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
|
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
|
||||||
|
@ -28,7 +28,6 @@ if os.name == 'nt':
|
|||||||
import ctypes
|
import ctypes
|
||||||
|
|
||||||
from .compat import (
|
from .compat import (
|
||||||
compat_basestring,
|
|
||||||
compat_cookiejar,
|
compat_cookiejar,
|
||||||
compat_expanduser,
|
compat_expanduser,
|
||||||
compat_get_terminal_size,
|
compat_get_terminal_size,
|
||||||
@ -40,7 +39,6 @@ from .compat import (
|
|||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
)
|
)
|
||||||
from .utils import (
|
from .utils import (
|
||||||
escape_url,
|
|
||||||
ContentTooShortError,
|
ContentTooShortError,
|
||||||
date_from_str,
|
date_from_str,
|
||||||
DateRange,
|
DateRange,
|
||||||
@ -51,7 +49,6 @@ from .utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
format_bytes,
|
format_bytes,
|
||||||
formatSeconds,
|
formatSeconds,
|
||||||
HEADRequest,
|
|
||||||
locked_file,
|
locked_file,
|
||||||
make_HTTPS_handler,
|
make_HTTPS_handler,
|
||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
@ -1860,27 +1857,6 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
def urlopen(self, req):
|
def urlopen(self, req):
|
||||||
""" Start an HTTP download """
|
""" Start an HTTP download """
|
||||||
|
|
||||||
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
|
|
||||||
# always respected by websites, some tend to give out URLs with non percent-encoded
|
|
||||||
# non-ASCII characters (see telemb.py, ard.py [#3412])
|
|
||||||
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
|
|
||||||
# To work around aforementioned issue we will replace request's original URL with
|
|
||||||
# percent-encoded one
|
|
||||||
req_is_string = isinstance(req, compat_basestring)
|
|
||||||
url = req if req_is_string else req.get_full_url()
|
|
||||||
url_escaped = escape_url(url)
|
|
||||||
|
|
||||||
# Substitute URL if any change after escaping
|
|
||||||
if url != url_escaped:
|
|
||||||
if req_is_string:
|
|
||||||
req = url_escaped
|
|
||||||
else:
|
|
||||||
req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
|
|
||||||
req = req_type(
|
|
||||||
url_escaped, data=req.data, headers=req.headers,
|
|
||||||
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
|
|
||||||
|
|
||||||
return self._opener.open(req, timeout=self._socket_timeout)
|
return self._opener.open(req, timeout=self._socket_timeout)
|
||||||
|
|
||||||
def print_debug_header(self):
|
def print_debug_header(self):
|
||||||
|
@ -45,11 +45,13 @@ class ExternalFD(FileDownloader):
|
|||||||
def supports(cls, info_dict):
|
def supports(cls, info_dict):
|
||||||
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
|
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
|
||||||
|
|
||||||
def _source_address(self, command_option):
|
def _option(self, command_option, param):
|
||||||
source_address = self.params.get('source_address')
|
param = self.params.get(param)
|
||||||
if source_address is None:
|
if param is None:
|
||||||
return []
|
return []
|
||||||
return [command_option, source_address]
|
if isinstance(param, bool):
|
||||||
|
return [command_option]
|
||||||
|
return [command_option, param]
|
||||||
|
|
||||||
def _configuration_args(self, default=[]):
|
def _configuration_args(self, default=[]):
|
||||||
ex_args = self.params.get('external_downloader_args')
|
ex_args = self.params.get('external_downloader_args')
|
||||||
@ -77,7 +79,17 @@ class CurlFD(ExternalFD):
|
|||||||
cmd = [self.exe, '--location', '-o', tmpfilename]
|
cmd = [self.exe, '--location', '-o', tmpfilename]
|
||||||
for key, val in info_dict['http_headers'].items():
|
for key, val in info_dict['http_headers'].items():
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
cmd += self._source_address('--interface')
|
cmd += self._option('--interface', 'source_address')
|
||||||
|
cmd += self._configuration_args()
|
||||||
|
cmd += ['--', info_dict['url']]
|
||||||
|
return cmd
|
||||||
|
|
||||||
|
|
||||||
|
class AxelFD(ExternalFD):
|
||||||
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
|
cmd = [self.exe, '-o', tmpfilename]
|
||||||
|
for key, val in info_dict['http_headers'].items():
|
||||||
|
cmd += ['-H', '%s: %s' % (key, val)]
|
||||||
cmd += self._configuration_args()
|
cmd += self._configuration_args()
|
||||||
cmd += ['--', info_dict['url']]
|
cmd += ['--', info_dict['url']]
|
||||||
return cmd
|
return cmd
|
||||||
@ -88,7 +100,9 @@ class WgetFD(ExternalFD):
|
|||||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
||||||
for key, val in info_dict['http_headers'].items():
|
for key, val in info_dict['http_headers'].items():
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
cmd += self._source_address('--bind-address')
|
cmd += self._option('--bind-address', 'source_address')
|
||||||
|
cmd += self._option('--proxy', 'proxy')
|
||||||
|
cmd += self._option('--no-check-certificate', 'nocheckcertificate')
|
||||||
cmd += self._configuration_args()
|
cmd += self._configuration_args()
|
||||||
cmd += ['--', info_dict['url']]
|
cmd += ['--', info_dict['url']]
|
||||||
return cmd
|
return cmd
|
||||||
@ -105,7 +119,8 @@ class Aria2cFD(ExternalFD):
|
|||||||
cmd += ['--out', os.path.basename(tmpfilename)]
|
cmd += ['--out', os.path.basename(tmpfilename)]
|
||||||
for key, val in info_dict['http_headers'].items():
|
for key, val in info_dict['http_headers'].items():
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
cmd += self._source_address('--interface')
|
cmd += self._option('--interface', 'source_address')
|
||||||
|
cmd += self._option('--all-proxy', 'proxy')
|
||||||
cmd += ['--', info_dict['url']]
|
cmd += ['--', info_dict['url']]
|
||||||
return cmd
|
return cmd
|
||||||
|
|
||||||
|
@ -118,6 +118,7 @@ from .dailymotion import (
|
|||||||
)
|
)
|
||||||
from .daum import DaumIE
|
from .daum import DaumIE
|
||||||
from .dbtv import DBTVIE
|
from .dbtv import DBTVIE
|
||||||
|
from .dcn import DCNIE
|
||||||
from .dctp import DctpTvIE
|
from .dctp import DctpTvIE
|
||||||
from .deezer import DeezerPlaylistIE
|
from .deezer import DeezerPlaylistIE
|
||||||
from .dfb import DFBIE
|
from .dfb import DFBIE
|
||||||
@ -431,6 +432,10 @@ from .orf import (
|
|||||||
from .parliamentliveuk import ParliamentLiveUKIE
|
from .parliamentliveuk import ParliamentLiveUKIE
|
||||||
from .patreon import PatreonIE
|
from .patreon import PatreonIE
|
||||||
from .pbs import PBSIE
|
from .pbs import PBSIE
|
||||||
|
from .periscope import (
|
||||||
|
PeriscopeIE,
|
||||||
|
QuickscopeIE,
|
||||||
|
)
|
||||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||||
from .phoenix import PhoenixIE
|
from .phoenix import PhoenixIE
|
||||||
from .photobucket import PhotobucketIE
|
from .photobucket import PhotobucketIE
|
||||||
@ -591,6 +596,7 @@ from .techtalks import TechTalksIE
|
|||||||
from .ted import TEDIE
|
from .ted import TEDIE
|
||||||
from .telebruxelles import TeleBruxellesIE
|
from .telebruxelles import TeleBruxellesIE
|
||||||
from .telecinco import TelecincoIE
|
from .telecinco import TelecincoIE
|
||||||
|
from .telegraaf import TelegraafIE
|
||||||
from .telemb import TeleMBIE
|
from .telemb import TeleMBIE
|
||||||
from .teletask import TeleTaskIE
|
from .teletask import TeleTaskIE
|
||||||
from .tenplay import TenPlayIE
|
from .tenplay import TenPlayIE
|
||||||
|
@ -18,6 +18,7 @@ class BreakIE(InfoExtractor):
|
|||||||
'id': '2468056',
|
'id': '2468056',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'When Girls Act Like D-Bags',
|
'title': 'When Girls Act Like D-Bags',
|
||||||
|
'age_limit': 13,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.break.com/video/ugc/baby-flex-2773063',
|
'url': 'http://www.break.com/video/ugc/baby-flex-2773063',
|
||||||
|
@ -1,53 +1,68 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import time
|
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
determine_ext,
|
||||||
parse_duration,
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
parse_iso8601,
|
||||||
|
remove_end,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ClipfishIE(InfoExtractor):
|
class ClipfishIE(InfoExtractor):
|
||||||
IE_NAME = 'clipfish'
|
_VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P<id>[0-9]+)'
|
||||||
|
|
||||||
_VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
|
'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
|
||||||
'md5': '2521cd644e862936cf2e698206e47385',
|
'md5': '79bc922f3e8a9097b3d68a93780fd475',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3966754',
|
'id': '3966754',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'FIFA 14 - E3 2013 Trailer',
|
'title': 'FIFA 14 - E3 2013 Trailer',
|
||||||
|
'timestamp': 1370938118,
|
||||||
|
'upload_date': '20130611',
|
||||||
'duration': 82,
|
'duration': 82,
|
||||||
},
|
}
|
||||||
'skip': 'Blocked in the US'
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group(1)
|
|
||||||
|
|
||||||
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
|
webpage = self._download_webpage(url, video_id)
|
||||||
(video_id, int(time.time())))
|
|
||||||
doc = self._download_xml(
|
video_info = self._parse_json(
|
||||||
info_url, video_id, note='Downloading info page')
|
js_to_json(self._html_search_regex(
|
||||||
title = doc.find('title').text
|
'(?s)videoObject\s*=\s*({.+?});', webpage, 'video object')),
|
||||||
video_url = doc.find('filename').text
|
video_id)
|
||||||
if video_url is None:
|
|
||||||
xml_bytes = xml.etree.ElementTree.tostring(doc)
|
formats = []
|
||||||
raise ExtractorError('Cannot find video URL in document %r' %
|
for video_url in re.findall(r'var\s+videourl\s*=\s*"([^"]+)"', webpage):
|
||||||
xml_bytes)
|
ext = determine_ext(video_url)
|
||||||
thumbnail = doc.find('imageurl').text
|
if ext == 'm3u8':
|
||||||
duration = parse_duration(doc.find('duration').text)
|
formats.append({
|
||||||
|
'url': video_url.replace('de.hls.fra.clipfish.de', 'hls.fra.clipfish.de'),
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': 'hls',
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': ext,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = remove_end(self._og_search_title(webpage), ' - Video')
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
duration = int_or_none(video_info.get('length'))
|
||||||
|
timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage, 'upload date'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'url': video_url,
|
'formats': formats,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
}
|
}
|
||||||
|
@ -18,6 +18,7 @@ from ..compat import (
|
|||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
|
compat_urllib_parse,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
@ -37,6 +38,9 @@ from ..utils import (
|
|||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
url_basename,
|
||||||
|
xpath_text,
|
||||||
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -200,8 +204,8 @@ class InfoExtractor(object):
|
|||||||
There must be a key "entries", which is a list, an iterable, or a PagedList
|
There must be a key "entries", which is a list, an iterable, or a PagedList
|
||||||
object, each element of which is a valid dictionary by this specification.
|
object, each element of which is a valid dictionary by this specification.
|
||||||
|
|
||||||
Additionally, playlists can have "title" and "id" attributes with the same
|
Additionally, playlists can have "title", "description" and "id" attributes
|
||||||
semantics as videos (see above).
|
with the same semantics as videos (see above).
|
||||||
|
|
||||||
|
|
||||||
_type "multi_video" indicates that there are multiple videos that
|
_type "multi_video" indicates that there are multiple videos that
|
||||||
@ -636,7 +640,7 @@ class InfoExtractor(object):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _meta_regex(prop):
|
def _meta_regex(prop):
|
||||||
return r'''(?isx)<meta
|
return r'''(?isx)<meta
|
||||||
(?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
|
(?=[^>]+(?:itemprop|name|property|id)=(["\']?)%s\1)
|
||||||
[^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
|
[^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
|
||||||
|
|
||||||
def _og_search_property(self, prop, html, name=None, **kargs):
|
def _og_search_property(self, prop, html, name=None, **kargs):
|
||||||
@ -978,69 +982,210 @@ class InfoExtractor(object):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
# TODO: improve extraction
|
@staticmethod
|
||||||
def _extract_smil_formats(self, smil_url, video_id, fatal=True):
|
def _xpath_ns(path, namespace=None):
|
||||||
smil = self._download_xml(
|
if not namespace:
|
||||||
smil_url, video_id, 'Downloading SMIL file',
|
return path
|
||||||
'Unable to download SMIL file', fatal=fatal)
|
out = []
|
||||||
|
for c in path.split('/'):
|
||||||
|
if not c or c == '.':
|
||||||
|
out.append(c)
|
||||||
|
else:
|
||||||
|
out.append('{%s}%s' % (namespace, c))
|
||||||
|
return '/'.join(out)
|
||||||
|
|
||||||
|
def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None):
|
||||||
|
smil = self._download_smil(smil_url, video_id, fatal=fatal)
|
||||||
|
|
||||||
if smil is False:
|
if smil is False:
|
||||||
assert not fatal
|
assert not fatal
|
||||||
return []
|
return []
|
||||||
|
|
||||||
base = smil.find('./head/meta').get('base')
|
namespace = self._parse_smil_namespace(smil)
|
||||||
|
|
||||||
|
return self._parse_smil_formats(
|
||||||
|
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
|
||||||
|
|
||||||
|
def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
|
||||||
|
smil = self._download_smil(smil_url, video_id, fatal=fatal)
|
||||||
|
if smil is False:
|
||||||
|
return {}
|
||||||
|
return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
|
||||||
|
|
||||||
|
def _download_smil(self, smil_url, video_id, fatal=True):
|
||||||
|
return self._download_xml(
|
||||||
|
smil_url, video_id, 'Downloading SMIL file',
|
||||||
|
'Unable to download SMIL file', fatal=fatal)
|
||||||
|
|
||||||
|
def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
|
||||||
|
namespace = self._parse_smil_namespace(smil)
|
||||||
|
|
||||||
|
formats = self._parse_smil_formats(
|
||||||
|
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
|
||||||
|
subtitles = self._parse_smil_subtitles(smil, namespace=namespace)
|
||||||
|
|
||||||
|
video_id = os.path.splitext(url_basename(smil_url))[0]
|
||||||
|
title = None
|
||||||
|
description = None
|
||||||
|
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
|
||||||
|
name = meta.attrib.get('name')
|
||||||
|
content = meta.attrib.get('content')
|
||||||
|
if not name or not content:
|
||||||
|
continue
|
||||||
|
if not title and name == 'title':
|
||||||
|
title = content
|
||||||
|
elif not description and name in ('description', 'abstract'):
|
||||||
|
description = content
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title or video_id,
|
||||||
|
'description': description,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _parse_smil_namespace(self, smil):
|
||||||
|
return self._search_regex(
|
||||||
|
r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
|
||||||
|
|
||||||
|
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None):
|
||||||
|
base = smil_url
|
||||||
|
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
|
||||||
|
b = meta.get('base') or meta.get('httpBase')
|
||||||
|
if b:
|
||||||
|
base = b
|
||||||
|
break
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
rtmp_count = 0
|
rtmp_count = 0
|
||||||
if smil.findall('./body/seq/video'):
|
http_count = 0
|
||||||
video = smil.findall('./body/seq/video')[0]
|
|
||||||
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
|
|
||||||
formats.extend(fmts)
|
|
||||||
else:
|
|
||||||
for video in smil.findall('./body/switch/video'):
|
|
||||||
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
|
|
||||||
formats.extend(fmts)
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
videos = smil.findall(self._xpath_ns('.//video', namespace))
|
||||||
|
for video in videos:
|
||||||
return formats
|
|
||||||
|
|
||||||
def _parse_smil_video(self, video, video_id, base, rtmp_count):
|
|
||||||
src = video.get('src')
|
src = video.get('src')
|
||||||
if not src:
|
if not src:
|
||||||
return [], rtmp_count
|
continue
|
||||||
|
|
||||||
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||||
|
filesize = int_or_none(video.get('size') or video.get('fileSize'))
|
||||||
width = int_or_none(video.get('width'))
|
width = int_or_none(video.get('width'))
|
||||||
height = int_or_none(video.get('height'))
|
height = int_or_none(video.get('height'))
|
||||||
proto = video.get('proto')
|
proto = video.get('proto')
|
||||||
if not proto:
|
|
||||||
if base:
|
|
||||||
if base.startswith('rtmp'):
|
|
||||||
proto = 'rtmp'
|
|
||||||
elif base.startswith('http'):
|
|
||||||
proto = 'http'
|
|
||||||
ext = video.get('ext')
|
ext = video.get('ext')
|
||||||
if proto == 'm3u8':
|
src_ext = determine_ext(src)
|
||||||
return self._extract_m3u8_formats(src, video_id, ext), rtmp_count
|
|
||||||
elif proto == 'rtmp':
|
|
||||||
rtmp_count += 1
|
|
||||||
streamer = video.get('streamer') or base
|
streamer = video.get('streamer') or base
|
||||||
return ([{
|
|
||||||
|
if proto == 'rtmp' or streamer.startswith('rtmp'):
|
||||||
|
rtmp_count += 1
|
||||||
|
formats.append({
|
||||||
'url': streamer,
|
'url': streamer,
|
||||||
'play_path': src,
|
'play_path': src,
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
|
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
|
||||||
'tbr': bitrate,
|
'tbr': bitrate,
|
||||||
|
'filesize': filesize,
|
||||||
'width': width,
|
'width': width,
|
||||||
'height': height,
|
'height': height,
|
||||||
}], rtmp_count)
|
})
|
||||||
elif proto.startswith('http'):
|
continue
|
||||||
return ([{
|
|
||||||
'url': base + src,
|
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
|
||||||
'ext': ext or 'flv',
|
|
||||||
|
if proto == 'm3u8' or src_ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
src_url, video_id, ext or 'mp4', m3u8_id='hls'))
|
||||||
|
continue
|
||||||
|
|
||||||
|
if src_ext == 'f4m':
|
||||||
|
f4m_url = src_url
|
||||||
|
if not f4m_params:
|
||||||
|
f4m_params = {
|
||||||
|
'hdcore': '3.2.0',
|
||||||
|
'plugin': 'flowplayer-3.2.0.1',
|
||||||
|
}
|
||||||
|
f4m_url += '&' if '?' in f4m_url else '?'
|
||||||
|
f4m_url += compat_urllib_parse.urlencode(f4m_params)
|
||||||
|
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds'))
|
||||||
|
continue
|
||||||
|
|
||||||
|
if src_url.startswith('http'):
|
||||||
|
http_count += 1
|
||||||
|
formats.append({
|
||||||
|
'url': src_url,
|
||||||
|
'ext': ext or src_ext or 'flv',
|
||||||
|
'format_id': 'http-%d' % (bitrate or http_count),
|
||||||
'tbr': bitrate,
|
'tbr': bitrate,
|
||||||
|
'filesize': filesize,
|
||||||
'width': width,
|
'width': width,
|
||||||
'height': height,
|
'height': height,
|
||||||
}], rtmp_count)
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _parse_smil_subtitles(self, smil, namespace=None):
|
||||||
|
subtitles = {}
|
||||||
|
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
|
||||||
|
src = textstream.get('src')
|
||||||
|
if not src:
|
||||||
|
continue
|
||||||
|
ext = textstream.get('ext') or determine_ext(src)
|
||||||
|
if not ext:
|
||||||
|
type_ = textstream.get('type')
|
||||||
|
if type_ == 'text/srt':
|
||||||
|
ext = 'srt'
|
||||||
|
lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName')
|
||||||
|
subtitles.setdefault(lang, []).append({
|
||||||
|
'url': src,
|
||||||
|
'ext': ext,
|
||||||
|
})
|
||||||
|
return subtitles
|
||||||
|
|
||||||
|
def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True):
|
||||||
|
xspf = self._download_xml(
|
||||||
|
playlist_url, playlist_id, 'Downloading xpsf playlist',
|
||||||
|
'Unable to download xspf manifest', fatal=fatal)
|
||||||
|
if xspf is False:
|
||||||
|
return []
|
||||||
|
return self._parse_xspf(xspf, playlist_id)
|
||||||
|
|
||||||
|
def _parse_xspf(self, playlist, playlist_id):
|
||||||
|
NS_MAP = {
|
||||||
|
'xspf': 'http://xspf.org/ns/0/',
|
||||||
|
's1': 'http://static.streamone.nl/player/ns/0',
|
||||||
|
}
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
|
||||||
|
title = xpath_text(
|
||||||
|
track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
|
||||||
|
description = xpath_text(
|
||||||
|
track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
|
||||||
|
thumbnail = xpath_text(
|
||||||
|
track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail')
|
||||||
|
duration = float_or_none(
|
||||||
|
xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': location.text,
|
||||||
|
'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
|
||||||
|
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
|
||||||
|
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
|
||||||
|
} for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
entries.append({
|
||||||
|
'id': playlist_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
})
|
||||||
|
return entries
|
||||||
|
|
||||||
def _live_title(self, name):
|
def _live_title(self, name):
|
||||||
""" Generate the title for a live video """
|
""" Generate the title for a live video """
|
||||||
|
84
youtube_dl/extractor/dcn.py
Normal file
84
youtube_dl/extractor/dcn.py
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DCNIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/.+|show/\d+/.+?)/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.dcndigital.ae/#/show/199074/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375/6887',
|
||||||
|
'info_dict':
|
||||||
|
{
|
||||||
|
'id': '17375',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'رحلة العمر : الحلقة 1',
|
||||||
|
'description': 'md5:0156e935d870acb8ef0a66d24070c6d6',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 2041,
|
||||||
|
'timestamp': 1227504126,
|
||||||
|
'upload_date': '20081124',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
request = compat_urllib_request.Request(
|
||||||
|
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
|
||||||
|
headers={'Origin': 'http://www.dcndigital.ae'})
|
||||||
|
|
||||||
|
video = self._download_json(request, video_id)
|
||||||
|
title = video.get('title_en') or video['title_ar']
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?'
|
||||||
|
+ compat_urllib_parse.urlencode({
|
||||||
|
'id': video['id'],
|
||||||
|
'user_id': video['user_id'],
|
||||||
|
'signature': video['signature'],
|
||||||
|
'countries': 'Q0M=',
|
||||||
|
'filter': 'DENY',
|
||||||
|
}), video_id)
|
||||||
|
|
||||||
|
m3u8_url = self._html_search_regex(r'file:\s*"([^"]+)', webpage, 'm3u8 url')
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
||||||
|
|
||||||
|
rtsp_url = self._search_regex(
|
||||||
|
r'<a[^>]+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False)
|
||||||
|
if rtsp_url:
|
||||||
|
formats.append({
|
||||||
|
'url': rtsp_url,
|
||||||
|
'format_id': 'rtsp',
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
img = video.get('img')
|
||||||
|
thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None
|
||||||
|
duration = int_or_none(video.get('duration'))
|
||||||
|
description = video.get('description_en') or video.get('description_ar')
|
||||||
|
timestamp = parse_iso8601(video.get('create_time') or video.get('update_time'), ' ')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -34,24 +34,14 @@ class DHMIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
playlist_url = self._search_regex(
|
playlist_url = self._search_regex(
|
||||||
r"file\s*:\s*'([^']+)'", webpage, 'playlist url')
|
r"file\s*:\s*'([^']+)'", webpage, 'playlist url')
|
||||||
|
|
||||||
playlist = self._download_xml(playlist_url, video_id)
|
entries = self._extract_xspf_playlist(playlist_url, playlist_id)
|
||||||
|
|
||||||
track = playlist.find(
|
|
||||||
'./{http://xspf.org/ns/0/}trackList/{http://xspf.org/ns/0/}track')
|
|
||||||
|
|
||||||
video_url = xpath_text(
|
|
||||||
track, './{http://xspf.org/ns/0/}location',
|
|
||||||
'video url', fatal=True)
|
|
||||||
thumbnail = xpath_text(
|
|
||||||
track, './{http://xspf.org/ns/0/}image',
|
|
||||||
'thumbnail')
|
|
||||||
|
|
||||||
title = self._search_regex(
|
title = self._search_regex(
|
||||||
[r'dc:title="([^"]+)"', r'<title> »([^<]+)</title>'],
|
[r'dc:title="([^"]+)"', r'<title> »([^<]+)</title>'],
|
||||||
@ -63,11 +53,10 @@ class DHMIE(InfoExtractor):
|
|||||||
r'<em>Length\s*</em>\s*:\s*</strong>([^<]+)',
|
r'<em>Length\s*</em>\s*:\s*</strong>([^<]+)',
|
||||||
webpage, 'duration', default=None))
|
webpage, 'duration', default=None))
|
||||||
|
|
||||||
return {
|
entries[0].update({
|
||||||
'id': video_id,
|
|
||||||
'url': video_url,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'thumbnail': thumbnail,
|
})
|
||||||
}
|
|
||||||
|
return self.playlist_result(entries, playlist_id)
|
||||||
|
@ -86,7 +86,7 @@ class FC2IE(InfoExtractor):
|
|||||||
|
|
||||||
info_url = (
|
info_url = (
|
||||||
"http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&".
|
"http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&".
|
||||||
format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.', '%2E')))
|
format(video_id, mimi, compat_urllib_request.quote(refer, safe=b'').replace('.', '%2E')))
|
||||||
|
|
||||||
info_webpage = self._download_webpage(
|
info_webpage = self._download_webpage(
|
||||||
info_url, video_id, note='Downloading info page')
|
info_url, video_id, note='Downloading info page')
|
||||||
|
@ -32,6 +32,7 @@ class FourTubeIE(InfoExtractor):
|
|||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'categories': list,
|
'categories': list,
|
||||||
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -130,6 +130,89 @@ class GenericIE(InfoExtractor):
|
|||||||
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
# SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
|
||||||
|
{
|
||||||
|
'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'smil',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Automatics, robotics and biocybernetics',
|
||||||
|
'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
|
||||||
|
'formats': 'mincount:16',
|
||||||
|
'subtitles': 'mincount:1',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'force_generic_extractor': True,
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
|
||||||
|
{
|
||||||
|
'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'hds',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'hds',
|
||||||
|
'formats': 'mincount:1',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# SMIL from https://www.restudy.dk/video/play/id/1637
|
||||||
|
{
|
||||||
|
'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'video_1637',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'video_1637',
|
||||||
|
'formats': 'mincount:3',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
|
||||||
|
{
|
||||||
|
'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'smil-service',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'smil-service',
|
||||||
|
'formats': 'mincount:1',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
|
||||||
|
{
|
||||||
|
'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4719370',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
|
||||||
|
'formats': 'mincount:3',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
|
||||||
|
{
|
||||||
|
'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'mZlp2ctYIUEB',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Tikibad ontruimd wegens brand',
|
||||||
|
'description': 'md5:05ca046ff47b931f9b04855015e163a4',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 33,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
# google redirect
|
# google redirect
|
||||||
{
|
{
|
||||||
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
||||||
@ -236,6 +319,19 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'add_ie': ['Ooyala'],
|
'add_ie': ['Ooyala'],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# ooyala video embedded with http://player.ooyala.com/iframe.js
|
||||||
|
'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '"Steve Jobs: Man in the Machine" trailer',
|
||||||
|
'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
# multiple ooyala embeds on SBN network websites
|
# multiple ooyala embeds on SBN network websites
|
||||||
{
|
{
|
||||||
'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
||||||
@ -1110,11 +1206,15 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
# Is it an RSS feed?
|
# Is it an RSS feed, a SMIL file or a XSPF playlist?
|
||||||
try:
|
try:
|
||||||
doc = parse_xml(webpage)
|
doc = parse_xml(webpage)
|
||||||
if doc.tag == 'rss':
|
if doc.tag == 'rss':
|
||||||
return self._extract_rss(url, video_id, doc)
|
return self._extract_rss(url, video_id, doc)
|
||||||
|
elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
|
||||||
|
return self._parse_smil(doc, url, video_id)
|
||||||
|
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
|
||||||
|
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
|
||||||
except compat_xml_parse_error:
|
except compat_xml_parse_error:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -1320,7 +1420,7 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.url_result(mobj.group('url'))
|
return self.url_result(mobj.group('url'))
|
||||||
|
|
||||||
# Look for Ooyala videos
|
# Look for Ooyala videos
|
||||||
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||||
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
|
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
|
||||||
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
|
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
|
||||||
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
||||||
@ -1716,7 +1816,8 @@ class GenericIE(InfoExtractor):
|
|||||||
# here's a fun little line of code for you:
|
# here's a fun little line of code for you:
|
||||||
video_id = os.path.splitext(video_id)[0]
|
video_id = os.path.splitext(video_id)[0]
|
||||||
|
|
||||||
if determine_ext(video_url) == 'smil':
|
ext = determine_ext(video_url)
|
||||||
|
if ext == 'smil':
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': self._extract_smil_formats(video_url, video_id),
|
'formats': self._extract_smil_formats(video_url, video_id),
|
||||||
@ -1724,6 +1825,8 @@ class GenericIE(InfoExtractor):
|
|||||||
'title': video_title,
|
'title': video_title,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
})
|
})
|
||||||
|
elif ext == 'xspf':
|
||||||
|
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
|
||||||
else:
|
else:
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -201,7 +201,7 @@ class IqiyiIE(InfoExtractor):
|
|||||||
return raw_data
|
return raw_data
|
||||||
|
|
||||||
def get_enc_key(self, swf_url, video_id):
|
def get_enc_key(self, swf_url, video_id):
|
||||||
enc_key = '8e29ab5666d041c3a1ea76e06dabdffb'
|
enc_key = '3601ba290e4f4662848c710e2122007e' # last update at 2015-08-10 for Zombie
|
||||||
return enc_key
|
return enc_key
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -14,7 +14,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class NowTVIE(InfoExtractor):
|
class NowTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?nowtv\.de/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/(?:player|preview)'
|
_VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/(?:player|preview)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# rtl
|
# rtl
|
||||||
@ -127,6 +127,9 @@ class NowTVIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.nowtv.de/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview',
|
'url': 'http://www.nowtv.de/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.nowtv.at/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview?return=/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -407,6 +407,7 @@ class NPORadioFragmentIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class VPROIE(NPOIE):
|
class VPROIE(NPOIE):
|
||||||
|
IE_NAME = 'vpro'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
|
_VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
|
@ -16,15 +16,17 @@ class OdnoklassnikiIE(InfoExtractor):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# metadata in JSON
|
# metadata in JSON
|
||||||
'url': 'http://ok.ru/video/20079905452',
|
'url': 'http://ok.ru/video/20079905452',
|
||||||
'md5': '8e24ad2da6f387948e7a7d44eb8668fe',
|
'md5': '6ba728d85d60aa2e6dd37c9e70fdc6bc',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '20079905452',
|
'id': '20079905452',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Культура меняет нас (прекрасный ролик!))',
|
'title': 'Культура меняет нас (прекрасный ролик!))',
|
||||||
'duration': 100,
|
'duration': 100,
|
||||||
|
'upload_date': '20141207',
|
||||||
'uploader_id': '330537914540',
|
'uploader_id': '330537914540',
|
||||||
'uploader': 'Виталий Добровольский',
|
'uploader': 'Виталий Добровольский',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# metadataUrl
|
# metadataUrl
|
||||||
@ -35,9 +37,11 @@ class OdnoklassnikiIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Девушка без комплексов ...',
|
'title': 'Девушка без комплексов ...',
|
||||||
'duration': 191,
|
'duration': 191,
|
||||||
|
'upload_date': '20150518',
|
||||||
'uploader_id': '534380003155',
|
'uploader_id': '534380003155',
|
||||||
'uploader': 'Андрей Мещанинов',
|
'uploader': '☭ Андрей Мещанинов ☭',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
|
'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
|
||||||
|
@ -92,6 +92,7 @@ class PBSIE(InfoExtractor):
|
|||||||
'duration': 3172,
|
'duration': 3172,
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'upload_date': '20140122',
|
'upload_date': '20140122',
|
||||||
|
'age_limit': 10,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires ffmpeg
|
'skip_download': True, # requires ffmpeg
|
||||||
|
99
youtube_dl/extractor/periscope.py
Normal file
99
youtube_dl/extractor/periscope.py
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
|
)
|
||||||
|
from ..utils import parse_iso8601
|
||||||
|
|
||||||
|
|
||||||
|
class PeriscopeIE(InfoExtractor):
|
||||||
|
IE_DESC = 'Periscope'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?periscope\.tv/w/(?P<id>[^/?#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
|
||||||
|
'md5': '65b57957972e503fcbbaeed8f4fa04ca',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '56102209',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Bec Boop - 🚠✈️🇬🇧 Fly above #London in Emirates Air Line cable car at night 🇬🇧✈️🚠 #BoopScope 🎀💗',
|
||||||
|
'timestamp': 1438978559,
|
||||||
|
'upload_date': '20150807',
|
||||||
|
'uploader': 'Bec Boop',
|
||||||
|
'uploader_id': '1465763',
|
||||||
|
},
|
||||||
|
'skip': 'Expires in 24 hours',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _call_api(self, method, token):
|
||||||
|
return self._download_json(
|
||||||
|
'https://api.periscope.tv/api/v2/%s?token=%s' % (method, token), token)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
token = self._match_id(url)
|
||||||
|
|
||||||
|
broadcast_data = self._call_api('getBroadcastPublic', token)
|
||||||
|
broadcast = broadcast_data['broadcast']
|
||||||
|
status = broadcast['status']
|
||||||
|
|
||||||
|
uploader = broadcast.get('user_display_name') or broadcast_data.get('user', {}).get('display_name')
|
||||||
|
uploader_id = broadcast.get('user_id') or broadcast_data.get('user', {}).get('id')
|
||||||
|
|
||||||
|
title = '%s - %s' % (uploader, status) if uploader else status
|
||||||
|
state = broadcast.get('state').lower()
|
||||||
|
if state == 'running':
|
||||||
|
title = self._live_title(title)
|
||||||
|
timestamp = parse_iso8601(broadcast.get('created_at'))
|
||||||
|
|
||||||
|
thumbnails = [{
|
||||||
|
'url': broadcast[image],
|
||||||
|
} for image in ('image_url', 'image_url_small') if broadcast.get(image)]
|
||||||
|
|
||||||
|
stream = self._call_api('getAccessPublic', token)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id in ('replay', 'rtmp', 'hls', 'https_hls'):
|
||||||
|
video_url = stream.get(format_id + '_url')
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
f = {
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'flv' if format_id == 'rtmp' else 'mp4',
|
||||||
|
}
|
||||||
|
if format_id != 'rtmp':
|
||||||
|
f['protocol'] = 'm3u8_native' if state == 'ended' else 'm3u8'
|
||||||
|
formats.append(f)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': broadcast.get('id') or token,
|
||||||
|
'title': title,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class QuickscopeIE(InfoExtractor):
|
||||||
|
IE_DESC = 'Quick Scope'
|
||||||
|
_VALID_URL = r'https?://watchonperiscope\.com/broadcast/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://watchonperiscope.com/broadcast/56180087',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
broadcast_id = self._match_id(url)
|
||||||
|
request = compat_urllib_request.Request(
|
||||||
|
'https://watchonperiscope.com/api/accessChannel', compat_urllib_parse.urlencode({
|
||||||
|
'broadcast_id': broadcast_id,
|
||||||
|
'entry_ticket': '',
|
||||||
|
'from_push': 'false',
|
||||||
|
'uses_sessions': 'true',
|
||||||
|
}).encode('utf-8'))
|
||||||
|
return self.url_result(
|
||||||
|
self._download_json(request, broadcast_id)['share_url'], 'Periscope')
|
@ -22,6 +22,7 @@ class Porn91IE(InfoExtractor):
|
|||||||
'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!',
|
'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'duration': 431,
|
'duration': 431,
|
||||||
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -68,4 +69,5 @@ class Porn91IE(InfoExtractor):
|
|||||||
'url': video_url,
|
'url': video_url,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'comment_count': comment_count,
|
'comment_count': comment_count,
|
||||||
|
'age_limit': self._rta_search(webpage),
|
||||||
}
|
}
|
||||||
|
@ -30,6 +30,7 @@ class RutubeIE(InfoExtractor):
|
|||||||
'uploader': 'NTDRussian',
|
'uploader': 'NTDRussian',
|
||||||
'uploader_id': '29790',
|
'uploader_id': '29790',
|
||||||
'upload_date': '20131016',
|
'upload_date': '20131016',
|
||||||
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# It requires ffmpeg (m3u8 download)
|
# It requires ffmpeg (m3u8 download)
|
||||||
|
@ -29,6 +29,7 @@ class SexyKarmaIE(InfoExtractor):
|
|||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'categories': list,
|
'categories': list,
|
||||||
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html',
|
'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html',
|
||||||
|
@ -45,6 +45,14 @@ class SouthParkDeIE(SouthParkIE):
|
|||||||
'title': 'The Government Won\'t Respect My Privacy',
|
'title': 'The Government Won\'t Respect My Privacy',
|
||||||
'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
|
'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# non-ASCII characters in initial URL
|
||||||
|
'url': 'http://www.southpark.de/alle-episoden/s18e09-hashtag-aufwärmen',
|
||||||
|
'playlist_count': 4,
|
||||||
|
}, {
|
||||||
|
# non-ASCII characters in redirect URL
|
||||||
|
'url': 'http://www.southpark.de/alle-episoden/s18e09',
|
||||||
|
'playlist_count': 4,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
35
youtube_dl/extractor/telegraaf.py
Normal file
35
youtube_dl/extractor/telegraaf.py
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import remove_end
|
||||||
|
|
||||||
|
|
||||||
|
class TelegraafIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/tv/(?:[^/]+/)+(?P<id>\d+)/[^/]+\.html'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html',
|
||||||
|
'md5': '83245a9779bcc4a24454bfd53c65b6dc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '24353229',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Tikibad ontruimd wegens brand',
|
||||||
|
'description': 'md5:05ca046ff47b931f9b04855015e163a4',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 33,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
playlist_url = self._search_regex(
|
||||||
|
r"iframe\.loadPlayer\('([^']+)'", webpage, 'player')
|
||||||
|
|
||||||
|
entries = self._extract_xspf_playlist(playlist_url, playlist_id)
|
||||||
|
title = remove_end(self._og_search_title(webpage), ' - VIDEO')
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, playlist_id, title, description)
|
@ -104,6 +104,7 @@ class TVPlayIE(InfoExtractor):
|
|||||||
'duration': 1492,
|
'duration': 1492,
|
||||||
'timestamp': 1330522854,
|
'timestamp': 1330522854,
|
||||||
'upload_date': '20120229',
|
'upload_date': '20120229',
|
||||||
|
'age_limit': 18,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# rtmp download
|
# rtmp download
|
||||||
|
@ -13,7 +13,7 @@ class TweakersIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://tweakers\.net/video/(?P<id>\d+)'
|
_VALID_URL = r'https?://tweakers\.net/video/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://tweakers.net/video/9926/new-nintendo-3ds-xl-op-alle-fronten-beter.html',
|
'url': 'https://tweakers.net/video/9926/new-nintendo-3ds-xl-op-alle-fronten-beter.html',
|
||||||
'md5': '1b5afa817403bb5baa08359dca31e6df',
|
'md5': '3147e4ddad366f97476a93863e4557c8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '9926',
|
'id': '9926',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -25,41 +25,7 @@ class TweakersIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
playlist_id = self._match_id(url)
|
||||||
|
entries = self._extract_xspf_playlist(
|
||||||
playlist = self._download_xml(
|
'https://tweakers.net/video/s1playlist/%s/playlist.xspf' % playlist_id, playlist_id)
|
||||||
'https://tweakers.net/video/s1playlist/%s/playlist.xspf' % video_id,
|
return self.playlist_result(entries, playlist_id)
|
||||||
video_id)
|
|
||||||
|
|
||||||
NS_MAP = {
|
|
||||||
'xspf': 'http://xspf.org/ns/0/',
|
|
||||||
's1': 'http://static.streamone.nl/player/ns/0',
|
|
||||||
}
|
|
||||||
|
|
||||||
track = playlist.find(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP))
|
|
||||||
|
|
||||||
title = xpath_text(
|
|
||||||
track, xpath_with_ns('./xspf:title', NS_MAP), 'title')
|
|
||||||
description = xpath_text(
|
|
||||||
track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
|
|
||||||
thumbnail = xpath_text(
|
|
||||||
track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail')
|
|
||||||
duration = float_or_none(
|
|
||||||
xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'),
|
|
||||||
1000)
|
|
||||||
|
|
||||||
formats = [{
|
|
||||||
'url': location.text,
|
|
||||||
'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
|
|
||||||
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
|
|
||||||
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
|
|
||||||
} for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'duration': duration,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class VideoLecturesNetIE(InfoExtractor):
|
class VideoLecturesNetIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/'
|
_VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/*(?:[#?].*)?$'
|
||||||
IE_NAME = 'videolectures.net'
|
IE_NAME = 'videolectures.net'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
|
@ -29,6 +29,7 @@ from ..utils import (
|
|||||||
class VimeoBaseInfoExtractor(InfoExtractor):
|
class VimeoBaseInfoExtractor(InfoExtractor):
|
||||||
_NETRC_MACHINE = 'vimeo'
|
_NETRC_MACHINE = 'vimeo'
|
||||||
_LOGIN_REQUIRED = False
|
_LOGIN_REQUIRED = False
|
||||||
|
_LOGIN_URL = 'https://vimeo.com/log_in'
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
(username, password) = self._get_login_info()
|
||||||
@ -37,21 +38,25 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
|||||||
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||||
return
|
return
|
||||||
self.report_login()
|
self.report_login()
|
||||||
login_url = 'https://vimeo.com/log_in'
|
webpage = self._download_webpage(self._LOGIN_URL, None, False)
|
||||||
webpage = self._download_webpage(login_url, None, False)
|
token = self._extract_xsrft(webpage)
|
||||||
token = self._search_regex(r'xsrft":"(.*?)"', webpage, 'login token')
|
|
||||||
data = urlencode_postdata({
|
data = urlencode_postdata({
|
||||||
|
'action': 'login',
|
||||||
'email': username,
|
'email': username,
|
||||||
'password': password,
|
'password': password,
|
||||||
'action': 'login',
|
|
||||||
'service': 'vimeo',
|
'service': 'vimeo',
|
||||||
'token': token,
|
'token': token,
|
||||||
})
|
})
|
||||||
login_request = compat_urllib_request.Request(login_url, data)
|
login_request = compat_urllib_request.Request(self._LOGIN_URL, data)
|
||||||
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
login_request.add_header('Cookie', 'xsrft=%s' % token)
|
login_request.add_header('Referer', self._LOGIN_URL)
|
||||||
self._download_webpage(login_request, None, False, 'Wrong login info')
|
self._download_webpage(login_request, None, False, 'Wrong login info')
|
||||||
|
|
||||||
|
def _extract_xsrft(self, webpage):
|
||||||
|
return self._search_regex(
|
||||||
|
r'xsrft\s*[=:]\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
|
||||||
|
webpage, 'login token', group='xsrft')
|
||||||
|
|
||||||
|
|
||||||
class VimeoIE(VimeoBaseInfoExtractor):
|
class VimeoIE(VimeoBaseInfoExtractor):
|
||||||
"""Information extractor for vimeo.com."""
|
"""Information extractor for vimeo.com."""
|
||||||
@ -193,7 +198,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
password = self._downloader.params.get('videopassword', None)
|
password = self._downloader.params.get('videopassword', None)
|
||||||
if password is None:
|
if password is None:
|
||||||
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
|
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
|
||||||
token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
|
token = self._extract_xsrft(webpage)
|
||||||
data = urlencode_postdata({
|
data = urlencode_postdata({
|
||||||
'password': password,
|
'password': password,
|
||||||
'token': token,
|
'token': token,
|
||||||
@ -203,7 +208,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
url = url.replace('http://', 'https://')
|
url = url.replace('http://', 'https://')
|
||||||
password_request = compat_urllib_request.Request(url + '/password', data)
|
password_request = compat_urllib_request.Request(url + '/password', data)
|
||||||
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
password_request.add_header('Cookie', 'xsrft=%s' % token)
|
password_request.add_header('Referer', url)
|
||||||
return self._download_webpage(
|
return self._download_webpage(
|
||||||
password_request, video_id,
|
password_request, video_id,
|
||||||
'Verifying the password', 'Wrong password')
|
'Verifying the password', 'Wrong password')
|
||||||
@ -422,10 +427,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class VimeoChannelIE(InfoExtractor):
|
class VimeoChannelIE(VimeoBaseInfoExtractor):
|
||||||
IE_NAME = 'vimeo:channel'
|
IE_NAME = 'vimeo:channel'
|
||||||
_VALID_URL = r'https://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
|
_VALID_URL = r'https://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
|
||||||
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
|
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
|
||||||
|
_TITLE = None
|
||||||
_TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
|
_TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://vimeo.com/channels/tributes',
|
'url': 'https://vimeo.com/channels/tributes',
|
||||||
@ -440,7 +446,7 @@ class VimeoChannelIE(InfoExtractor):
|
|||||||
return '%s/videos/page:%d/' % (base_url, pagenum)
|
return '%s/videos/page:%d/' % (base_url, pagenum)
|
||||||
|
|
||||||
def _extract_list_title(self, webpage):
|
def _extract_list_title(self, webpage):
|
||||||
return self._html_search_regex(self._TITLE_RE, webpage, 'list title')
|
return self._TITLE or self._html_search_regex(self._TITLE_RE, webpage, 'list title')
|
||||||
|
|
||||||
def _login_list_password(self, page_url, list_id, webpage):
|
def _login_list_password(self, page_url, list_id, webpage):
|
||||||
login_form = self._search_regex(
|
login_form = self._search_regex(
|
||||||
@ -453,7 +459,7 @@ class VimeoChannelIE(InfoExtractor):
|
|||||||
if password is None:
|
if password is None:
|
||||||
raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
|
raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
|
||||||
fields = self._hidden_inputs(login_form)
|
fields = self._hidden_inputs(login_form)
|
||||||
token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
|
token = self._extract_xsrft(webpage)
|
||||||
fields['token'] = token
|
fields['token'] = token
|
||||||
fields['password'] = password
|
fields['password'] = password
|
||||||
post = urlencode_postdata(fields)
|
post = urlencode_postdata(fields)
|
||||||
@ -499,7 +505,7 @@ class VimeoChannelIE(InfoExtractor):
|
|||||||
|
|
||||||
class VimeoUserIE(VimeoChannelIE):
|
class VimeoUserIE(VimeoChannelIE):
|
||||||
IE_NAME = 'vimeo:user'
|
IE_NAME = 'vimeo:user'
|
||||||
_VALID_URL = r'https://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
|
_VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
|
||||||
_TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
|
_TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://vimeo.com/nkistudio/videos',
|
'url': 'https://vimeo.com/nkistudio/videos',
|
||||||
@ -603,14 +609,14 @@ class VimeoReviewIE(InfoExtractor):
|
|||||||
return self.url_result(player_url, 'Vimeo', video_id)
|
return self.url_result(player_url, 'Vimeo', video_id)
|
||||||
|
|
||||||
|
|
||||||
class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
|
class VimeoWatchLaterIE(VimeoChannelIE):
|
||||||
IE_NAME = 'vimeo:watchlater'
|
IE_NAME = 'vimeo:watchlater'
|
||||||
IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)'
|
IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)'
|
||||||
_VALID_URL = r'https://vimeo\.com/home/watchlater|:vimeowatchlater'
|
_VALID_URL = r'https://vimeo\.com/(?:home/)?watchlater|:vimeowatchlater'
|
||||||
|
_TITLE = 'Watch Later'
|
||||||
_LOGIN_REQUIRED = True
|
_LOGIN_REQUIRED = True
|
||||||
_TITLE_RE = r'href="/home/watchlater".*?>(.*?)<'
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://vimeo.com/home/watchlater',
|
'url': 'https://vimeo.com/watchlater',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@ -626,7 +632,7 @@ class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
|
|||||||
return request
|
return request
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
return self._extract_videos('watchlater', 'https://vimeo.com/home/watchlater')
|
return self._extract_videos('watchlater', 'https://vimeo.com/watchlater')
|
||||||
|
|
||||||
|
|
||||||
class VimeoLikesIE(InfoExtractor):
|
class VimeoLikesIE(InfoExtractor):
|
||||||
|
@ -213,7 +213,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
|(?: # or the v= param in all its forms
|
|(?: # or the v= param in all its forms
|
||||||
(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
|
(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
|
||||||
(?:\?|\#!?) # the params delimiter ? or # or #!
|
(?:\?|\#!?) # the params delimiter ? or # or #!
|
||||||
(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
|
(?:.*?&)?? # any other preceding param (like /?s=tuff&v=xxxx)
|
||||||
v=
|
v=
|
||||||
)
|
)
|
||||||
))
|
))
|
||||||
@ -365,6 +365,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'description': 'md5:64249768eec3bc4276236606ea996373',
|
'description': 'md5:64249768eec3bc4276236606ea996373',
|
||||||
'uploader': 'justintimberlakeVEVO',
|
'uploader': 'justintimberlakeVEVO',
|
||||||
'uploader_id': 'justintimberlakeVEVO',
|
'uploader_id': 'justintimberlakeVEVO',
|
||||||
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -380,6 +381,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'uploader_id': 'setindia'
|
'uploader_id': 'setindia'
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY',
|
||||||
|
'note': 'Use the first video ID in the URL',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'BaW_jenozKc',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'youtube-dl test video "\'/\\ä↭𝕐',
|
||||||
|
'uploader': 'Philipp Hagemeister',
|
||||||
|
'uploader_id': 'phihag',
|
||||||
|
'upload_date': '20121002',
|
||||||
|
'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
|
||||||
|
'categories': ['Science & Technology'],
|
||||||
|
'tags': ['youtube-dl'],
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
|
'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
|
||||||
'note': '256k DASH audio (format 141) via DASH manifest',
|
'note': '256k DASH audio (format 141) via DASH manifest',
|
||||||
@ -421,7 +442,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'id': 'nfWlot6h_JM',
|
'id': 'nfWlot6h_JM',
|
||||||
'ext': 'm4a',
|
'ext': 'm4a',
|
||||||
'title': 'Taylor Swift - Shake It Off',
|
'title': 'Taylor Swift - Shake It Off',
|
||||||
'description': 'md5:2acfda1b285bdd478ccec22f9918199d',
|
'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
|
||||||
'uploader': 'TaylorSwiftVEVO',
|
'uploader': 'TaylorSwiftVEVO',
|
||||||
'uploader_id': 'TaylorSwiftVEVO',
|
'uploader_id': 'TaylorSwiftVEVO',
|
||||||
'upload_date': '20140818',
|
'upload_date': '20140818',
|
||||||
@ -455,6 +476,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'uploader': 'The Witcher',
|
'uploader': 'The Witcher',
|
||||||
'uploader_id': 'WitcherGame',
|
'uploader_id': 'WitcherGame',
|
||||||
'upload_date': '20140605',
|
'upload_date': '20140605',
|
||||||
|
'age_limit': 18,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# Age-gate video with encrypted signature
|
# Age-gate video with encrypted signature
|
||||||
@ -468,6 +490,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'uploader': 'LloydVEVO',
|
'uploader': 'LloydVEVO',
|
||||||
'uploader_id': 'LloydVEVO',
|
'uploader_id': 'LloydVEVO',
|
||||||
'upload_date': '20110629',
|
'upload_date': '20110629',
|
||||||
|
'age_limit': 18,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
|
# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
|
||||||
@ -492,7 +515,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'lqQg6PlCWgI',
|
'id': 'lqQg6PlCWgI',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'upload_date': '20120731',
|
'upload_date': '20120724',
|
||||||
'uploader_id': 'olympic',
|
'uploader_id': 'olympic',
|
||||||
'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
|
'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
|
||||||
'uploader': 'Olympics',
|
'uploader': 'Olympics',
|
||||||
@ -521,7 +544,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'url': 'qEJwOuvDf7I',
|
'url': 'qEJwOuvDf7I',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'qEJwOuvDf7I',
|
'id': 'qEJwOuvDf7I',
|
||||||
'ext': 'mp4',
|
'ext': 'webm',
|
||||||
'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
|
'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
|
||||||
'description': '',
|
'description': '',
|
||||||
'upload_date': '20150404',
|
'upload_date': '20150404',
|
||||||
|
@ -651,6 +651,26 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
def http_request(self, req):
|
def http_request(self, req):
|
||||||
|
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
|
||||||
|
# always respected by websites, some tend to give out URLs with non percent-encoded
|
||||||
|
# non-ASCII characters (see telemb.py, ard.py [#3412])
|
||||||
|
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
|
||||||
|
# To work around aforementioned issue we will replace request's original URL with
|
||||||
|
# percent-encoded one
|
||||||
|
# Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
|
||||||
|
# the code of this workaround has been moved here from YoutubeDL.urlopen()
|
||||||
|
url = req.get_full_url()
|
||||||
|
url_escaped = escape_url(url)
|
||||||
|
|
||||||
|
# Substitute URL if any change after escaping
|
||||||
|
if url != url_escaped:
|
||||||
|
req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
|
||||||
|
new_req = req_type(
|
||||||
|
url_escaped, data=req.data, headers=req.headers,
|
||||||
|
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
|
||||||
|
new_req.timeout = req.timeout
|
||||||
|
req = new_req
|
||||||
|
|
||||||
for h, v in std_headers.items():
|
for h, v in std_headers.items():
|
||||||
# Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
|
# Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
|
||||||
# The dict keys are capitalized because of this bug by urllib
|
# The dict keys are capitalized because of this bug by urllib
|
||||||
@ -695,6 +715,17 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|||||||
gz = io.BytesIO(self.deflate(resp.read()))
|
gz = io.BytesIO(self.deflate(resp.read()))
|
||||||
resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
|
resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
|
||||||
resp.msg = old_resp.msg
|
resp.msg = old_resp.msg
|
||||||
|
# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986
|
||||||
|
if 300 <= resp.code < 400:
|
||||||
|
location = resp.headers.get('Location')
|
||||||
|
if location:
|
||||||
|
# As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
|
||||||
|
if sys.version_info >= (3, 0):
|
||||||
|
location = location.encode('iso-8859-1').decode('utf-8')
|
||||||
|
location_escaped = escape_url(location)
|
||||||
|
if location != location_escaped:
|
||||||
|
del resp.headers['Location']
|
||||||
|
resp.headers['Location'] = location_escaped
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
https_request = http_request
|
https_request = http_request
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2015.07.28'
|
__version__ = '2015.08.09'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user