pep: function name refactorings
This commit is contained in:
parent
fad4ceb534
commit
1c33e3bee2
@ -21,7 +21,7 @@ from youtube_dl.compat import (
|
|||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
)
|
)
|
||||||
from youtube_dl.utils import (
|
from youtube_dl.utils import (
|
||||||
make_HTTPS_handler,
|
make_https_handler,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -33,7 +33,7 @@ class GitHubReleaser(object):
|
|||||||
|
|
||||||
def __init__(self, debuglevel=0):
|
def __init__(self, debuglevel=0):
|
||||||
self._init_github_account()
|
self._init_github_account()
|
||||||
https_handler = make_HTTPS_handler({}, debuglevel=debuglevel)
|
https_handler = make_https_handler({}, debuglevel=debuglevel)
|
||||||
self._opener = compat_urllib_request.build_opener(https_handler)
|
self._opener = compat_urllib_request.build_opener(https_handler)
|
||||||
|
|
||||||
def _init_github_account(self):
|
def _init_github_account(self):
|
||||||
|
@ -13,7 +13,7 @@ from test.helper import http_server_port, try_rm
|
|||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.compat import compat_http_server
|
from youtube_dl.compat import compat_http_server
|
||||||
from youtube_dl.downloader.http import HttpFD
|
from youtube_dl.downloader.http import HttpFD
|
||||||
from youtube_dl.utils import encodeFilename
|
from youtube_dl.utils import encode_filename
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
@ -91,12 +91,12 @@ class TestHttpFD(unittest.TestCase):
|
|||||||
ydl = YoutubeDL(params)
|
ydl = YoutubeDL(params)
|
||||||
downloader = HttpFD(ydl, params)
|
downloader = HttpFD(ydl, params)
|
||||||
filename = 'testfile.mp4'
|
filename = 'testfile.mp4'
|
||||||
try_rm(encodeFilename(filename))
|
try_rm(encode_filename(filename))
|
||||||
self.assertTrue(downloader.real_download(filename, {
|
self.assertTrue(downloader.real_download(filename, {
|
||||||
'url': 'http://127.0.0.1:%d/%s' % (self.port, ep),
|
'url': 'http://127.0.0.1:%d/%s' % (self.port, ep),
|
||||||
}))
|
}))
|
||||||
self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE)
|
self.assertEqual(os.path.getsize(encode_filename(filename)), TEST_SIZE)
|
||||||
try_rm(encodeFilename(filename))
|
try_rm(encode_filename(filename))
|
||||||
|
|
||||||
def download_all(self, params):
|
def download_all(self, params):
|
||||||
for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'):
|
for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'):
|
||||||
|
@ -10,7 +10,7 @@ import os
|
|||||||
import subprocess
|
import subprocess
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from youtube_dl.utils import encodeArgument
|
from youtube_dl.utils import encode_argument
|
||||||
|
|
||||||
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
@ -34,7 +34,7 @@ class TestExecution(unittest.TestCase):
|
|||||||
|
|
||||||
def test_cmdline_umlauts(self):
|
def test_cmdline_umlauts(self):
|
||||||
p = subprocess.Popen(
|
p = subprocess.Popen(
|
||||||
[sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'],
|
[sys.executable, 'youtube_dl/__main__.py', encode_argument('ä'), '--version'],
|
||||||
cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
|
cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
|
||||||
_, stderr = p.communicate()
|
_, stderr = p.communicate()
|
||||||
self.assertFalse(stderr)
|
self.assertFalse(stderr)
|
||||||
|
@ -26,7 +26,7 @@ from youtube_dl.utils import (
|
|||||||
determine_ext,
|
determine_ext,
|
||||||
dict_get,
|
dict_get,
|
||||||
encode_compat_str,
|
encode_compat_str,
|
||||||
encodeFilename,
|
encode_filename,
|
||||||
escape_rfc3986,
|
escape_rfc3986,
|
||||||
escape_url,
|
escape_url,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
@ -48,7 +48,7 @@ from youtube_dl.utils import (
|
|||||||
multipart_encode,
|
multipart_encode,
|
||||||
ohdave_rsa_encrypt,
|
ohdave_rsa_encrypt,
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_filesize,
|
parse_filesize,
|
||||||
@ -71,7 +71,7 @@ from youtube_dl.utils import (
|
|||||||
str_to_int,
|
str_to_int,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
timeconvert,
|
timeconvert,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
@ -277,21 +277,21 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(remove_quotes('";"'), ';')
|
self.assertEqual(remove_quotes('";"'), ';')
|
||||||
|
|
||||||
def test_ordered_set(self):
|
def test_ordered_set(self):
|
||||||
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
|
self.assertEqual(ordered_set([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
|
||||||
self.assertEqual(orderedSet([]), [])
|
self.assertEqual(ordered_set([]), [])
|
||||||
self.assertEqual(orderedSet([1]), [1])
|
self.assertEqual(ordered_set([1]), [1])
|
||||||
# keep the list ordered
|
# keep the list ordered
|
||||||
self.assertEqual(orderedSet([135, 1, 1, 1]), [135, 1])
|
self.assertEqual(ordered_set([135, 1, 1, 1]), [135, 1])
|
||||||
|
|
||||||
def test_unescape_html(self):
|
def test_unescape_html(self):
|
||||||
self.assertEqual(unescapeHTML('%20;'), '%20;')
|
self.assertEqual(unescape_html('%20;'), '%20;')
|
||||||
self.assertEqual(unescapeHTML('/'), '/')
|
self.assertEqual(unescape_html('/'), '/')
|
||||||
self.assertEqual(unescapeHTML('/'), '/')
|
self.assertEqual(unescape_html('/'), '/')
|
||||||
self.assertEqual(unescapeHTML('é'), 'é')
|
self.assertEqual(unescape_html('é'), 'é')
|
||||||
self.assertEqual(unescapeHTML('�'), '�')
|
self.assertEqual(unescape_html('�'), '�')
|
||||||
self.assertEqual(unescapeHTML('&a"'), '&a"')
|
self.assertEqual(unescape_html('&a"'), '&a"')
|
||||||
# HTML5 entities
|
# HTML5 entities
|
||||||
self.assertEqual(unescapeHTML('.''), '.\'')
|
self.assertEqual(unescape_html('.''), '.\'')
|
||||||
|
|
||||||
def test_date_from_str(self):
|
def test_date_from_str(self):
|
||||||
self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day'))
|
self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day'))
|
||||||
@ -462,7 +462,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(res_data, {'a': 'b', 'c': 'd'})
|
self.assertEqual(res_data, {'a': 'b', 'c': 'd'})
|
||||||
|
|
||||||
def test_shell_quote(self):
|
def test_shell_quote(self):
|
||||||
args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
|
args = ['ffmpeg', '-i', encode_filename('ñ€ß\'.mp4')]
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
shell_quote(args),
|
shell_quote(args),
|
||||||
"""ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''')
|
"""ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''')
|
||||||
|
@ -53,19 +53,19 @@ from .utils import (
|
|||||||
determine_protocol,
|
determine_protocol,
|
||||||
DownloadError,
|
DownloadError,
|
||||||
encode_compat_str,
|
encode_compat_str,
|
||||||
encodeFilename,
|
encode_filename,
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
expand_path,
|
expand_path,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
format_bytes,
|
format_bytes,
|
||||||
formatSeconds,
|
format_seconds,
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
ISO3166Utils,
|
ISO3166Utils,
|
||||||
locked_file,
|
locked_file,
|
||||||
make_HTTPS_handler,
|
make_https_handler,
|
||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
PagedList,
|
PagedList,
|
||||||
parse_filesize,
|
parse_filesize,
|
||||||
PerRequestProxyHandler,
|
PerRequestProxyHandler,
|
||||||
@ -710,7 +710,7 @@ class YoutubeDL(object):
|
|||||||
# 'Treat' all problem characters by passing filename through preferredencoding
|
# 'Treat' all problem characters by passing filename through preferredencoding
|
||||||
# to workaround encoding issues with subprocess on python2 @ Windows
|
# to workaround encoding issues with subprocess on python2 @ Windows
|
||||||
if sys.version_info < (3, 0) and sys.platform == 'win32':
|
if sys.version_info < (3, 0) and sys.platform == 'win32':
|
||||||
filename = encodeFilename(filename, True).decode(preferredencoding())
|
filename = encode_filename(filename, True).decode(preferredencoding())
|
||||||
return sanitize_path(filename)
|
return sanitize_path(filename)
|
||||||
except ValueError as err:
|
except ValueError as err:
|
||||||
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
|
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
|
||||||
@ -918,7 +918,7 @@ class YoutubeDL(object):
|
|||||||
yield int(item)
|
yield int(item)
|
||||||
else:
|
else:
|
||||||
yield int(string_segment)
|
yield int(string_segment)
|
||||||
playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
|
playlistitems = ordered_set(iter_playlistitems(playlistitems_str))
|
||||||
|
|
||||||
ie_entries = ie_result['entries']
|
ie_entries = ie_result['entries']
|
||||||
|
|
||||||
@ -1735,7 +1735,7 @@ class YoutubeDL(object):
|
|||||||
if self.params.get('forcefilename', False) and filename is not None:
|
if self.params.get('forcefilename', False) and filename is not None:
|
||||||
self.to_stdout(filename)
|
self.to_stdout(filename)
|
||||||
if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
|
if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
|
||||||
self.to_stdout(formatSeconds(info_dict['duration']))
|
self.to_stdout(format_seconds(info_dict['duration']))
|
||||||
if self.params.get('forceformat', False):
|
if self.params.get('forceformat', False):
|
||||||
self.to_stdout(info_dict['format'])
|
self.to_stdout(info_dict['format'])
|
||||||
if self.params.get('forcejson', False):
|
if self.params.get('forcejson', False):
|
||||||
@ -1758,19 +1758,19 @@ class YoutubeDL(object):
|
|||||||
self.report_error('unable to create directory ' + error_to_compat_str(err))
|
self.report_error('unable to create directory ' + error_to_compat_str(err))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
|
if not ensure_dir_exists(sanitize_path(encode_filename(filename))):
|
||||||
return
|
return
|
||||||
|
|
||||||
if self.params.get('writedescription', False):
|
if self.params.get('writedescription', False):
|
||||||
descfn = replace_extension(filename, 'description', info_dict.get('ext'))
|
descfn = replace_extension(filename, 'description', info_dict.get('ext'))
|
||||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
|
if self.params.get('nooverwrites', False) and os.path.exists(encode_filename(descfn)):
|
||||||
self.to_screen('[info] Video description is already present')
|
self.to_screen('[info] Video description is already present')
|
||||||
elif info_dict.get('description') is None:
|
elif info_dict.get('description') is None:
|
||||||
self.report_warning('There\'s no description to write.')
|
self.report_warning('There\'s no description to write.')
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
self.to_screen('[info] Writing video description to: ' + descfn)
|
self.to_screen('[info] Writing video description to: ' + descfn)
|
||||||
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
with io.open(encode_filename(descfn), 'w', encoding='utf-8') as descfile:
|
||||||
descfile.write(info_dict['description'])
|
descfile.write(info_dict['description'])
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error('Cannot write description file ' + descfn)
|
self.report_error('Cannot write description file ' + descfn)
|
||||||
@ -1778,12 +1778,12 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
if self.params.get('writeannotations', False):
|
if self.params.get('writeannotations', False):
|
||||||
annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
|
annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
|
||||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
|
if self.params.get('nooverwrites', False) and os.path.exists(encode_filename(annofn)):
|
||||||
self.to_screen('[info] Video annotations are already present')
|
self.to_screen('[info] Video annotations are already present')
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
self.to_screen('[info] Writing video annotations to: ' + annofn)
|
self.to_screen('[info] Writing video annotations to: ' + annofn)
|
||||||
with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
|
with io.open(encode_filename(annofn), 'w', encoding='utf-8') as annofile:
|
||||||
annofile.write(info_dict['annotations'])
|
annofile.write(info_dict['annotations'])
|
||||||
except (KeyError, TypeError):
|
except (KeyError, TypeError):
|
||||||
self.report_warning('There are no annotations to write.')
|
self.report_warning('There are no annotations to write.')
|
||||||
@ -1802,7 +1802,7 @@ class YoutubeDL(object):
|
|||||||
for sub_lang, sub_info in subtitles.items():
|
for sub_lang, sub_info in subtitles.items():
|
||||||
sub_format = sub_info['ext']
|
sub_format = sub_info['ext']
|
||||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
||||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
if self.params.get('nooverwrites', False) and os.path.exists(encode_filename(sub_filename)):
|
||||||
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
|
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
|
||||||
else:
|
else:
|
||||||
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
||||||
@ -1810,7 +1810,7 @@ class YoutubeDL(object):
|
|||||||
try:
|
try:
|
||||||
# Use newline='' to prevent conversion of newline characters
|
# Use newline='' to prevent conversion of newline characters
|
||||||
# See https://github.com/rg3/youtube-dl/issues/10268
|
# See https://github.com/rg3/youtube-dl/issues/10268
|
||||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
|
with io.open(encode_filename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
|
||||||
subfile.write(sub_info['data'])
|
subfile.write(sub_info['data'])
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error('Cannot write subtitles file ' + sub_filename)
|
self.report_error('Cannot write subtitles file ' + sub_filename)
|
||||||
@ -1819,7 +1819,7 @@ class YoutubeDL(object):
|
|||||||
try:
|
try:
|
||||||
sub_data = ie._request_webpage(
|
sub_data = ie._request_webpage(
|
||||||
sub_info['url'], info_dict['id'], note=False).read()
|
sub_info['url'], info_dict['id'], note=False).read()
|
||||||
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
|
with io.open(encode_filename(sub_filename), 'wb') as subfile:
|
||||||
subfile.write(sub_data)
|
subfile.write(sub_data)
|
||||||
except (ExtractorError, IOError, OSError, ValueError) as err:
|
except (ExtractorError, IOError, OSError, ValueError) as err:
|
||||||
self.report_warning('Unable to download subtitle for "%s": %s' %
|
self.report_warning('Unable to download subtitle for "%s": %s' %
|
||||||
@ -1828,7 +1828,7 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
if self.params.get('writeinfojson', False):
|
if self.params.get('writeinfojson', False):
|
||||||
infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
|
infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
|
||||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
|
if self.params.get('nooverwrites', False) and os.path.exists(encode_filename(infofn)):
|
||||||
self.to_screen('[info] Video description metadata is already present')
|
self.to_screen('[info] Video description metadata is already present')
|
||||||
else:
|
else:
|
||||||
self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
|
self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
|
||||||
@ -1889,7 +1889,7 @@ class YoutubeDL(object):
|
|||||||
'Requested formats are incompatible for merge and will be merged into mkv.')
|
'Requested formats are incompatible for merge and will be merged into mkv.')
|
||||||
# Ensure filename always has a correct extension for successful merge
|
# Ensure filename always has a correct extension for successful merge
|
||||||
filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
|
filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
|
||||||
if os.path.exists(encodeFilename(filename)):
|
if os.path.exists(encode_filename(filename)):
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
'[download] %s has already been downloaded and '
|
'[download] %s has already been downloaded and '
|
||||||
'merged' % filename)
|
'merged' % filename)
|
||||||
@ -2055,7 +2055,7 @@ class YoutubeDL(object):
|
|||||||
for old_filename in files_to_delete:
|
for old_filename in files_to_delete:
|
||||||
self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
|
self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
|
||||||
try:
|
try:
|
||||||
os.remove(encodeFilename(old_filename))
|
os.remove(encode_filename(old_filename))
|
||||||
except (IOError, OSError):
|
except (IOError, OSError):
|
||||||
self.report_warning('Unable to remove downloaded original file')
|
self.report_warning('Unable to remove downloaded original file')
|
||||||
|
|
||||||
@ -2319,7 +2319,7 @@ class YoutubeDL(object):
|
|||||||
proxy_handler = PerRequestProxyHandler(proxies)
|
proxy_handler = PerRequestProxyHandler(proxies)
|
||||||
|
|
||||||
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
|
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
|
||||||
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
|
https_handler = make_https_handler(self.params, debuglevel=debuglevel)
|
||||||
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
|
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
|
||||||
data_handler = compat_urllib_request_DataHandler()
|
data_handler = compat_urllib_request_DataHandler()
|
||||||
|
|
||||||
@ -2378,7 +2378,7 @@ class YoutubeDL(object):
|
|||||||
thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
|
thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
|
||||||
t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
|
t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
|
||||||
|
|
||||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
|
if self.params.get('nooverwrites', False) and os.path.exists(encode_filename(thumb_filename)):
|
||||||
self.to_screen('[%s] %s: Thumbnail %sis already present' %
|
self.to_screen('[%s] %s: Thumbnail %sis already present' %
|
||||||
(info_dict['extractor'], info_dict['id'], thumb_display_id))
|
(info_dict['extractor'], info_dict['id'], thumb_display_id))
|
||||||
else:
|
else:
|
||||||
@ -2386,7 +2386,7 @@ class YoutubeDL(object):
|
|||||||
(info_dict['extractor'], info_dict['id'], thumb_display_id))
|
(info_dict['extractor'], info_dict['id'], thumb_display_id))
|
||||||
try:
|
try:
|
||||||
uf = self.urlopen(t['url'])
|
uf = self.urlopen(t['url'])
|
||||||
with open(encodeFilename(thumb_filename), 'wb') as thumbf:
|
with open(encode_filename(thumb_filename), 'wb') as thumbf:
|
||||||
shutil.copyfileobj(uf, thumbf)
|
shutil.copyfileobj(uf, thumbf)
|
||||||
self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
|
self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
|
||||||
(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
|
(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
|
||||||
|
@ -22,7 +22,7 @@ from .compat import (
|
|||||||
)
|
)
|
||||||
from .utils import (
|
from .utils import (
|
||||||
DateRange,
|
DateRange,
|
||||||
decodeOption,
|
decode_option,
|
||||||
DEFAULT_OUTTMPL,
|
DEFAULT_OUTTMPL,
|
||||||
DownloadError,
|
DownloadError,
|
||||||
expand_path,
|
expand_path,
|
||||||
@ -375,8 +375,8 @@ def _real_main(argv=None):
|
|||||||
'listsubtitles': opts.listsubtitles,
|
'listsubtitles': opts.listsubtitles,
|
||||||
'subtitlesformat': opts.subtitlesformat,
|
'subtitlesformat': opts.subtitlesformat,
|
||||||
'subtitleslangs': opts.subtitleslangs,
|
'subtitleslangs': opts.subtitleslangs,
|
||||||
'matchtitle': decodeOption(opts.matchtitle),
|
'matchtitle': decode_option(opts.matchtitle),
|
||||||
'rejecttitle': decodeOption(opts.rejecttitle),
|
'rejecttitle': decode_option(opts.rejecttitle),
|
||||||
'max_downloads': opts.max_downloads,
|
'max_downloads': opts.max_downloads,
|
||||||
'prefer_free_formats': opts.prefer_free_formats,
|
'prefer_free_formats': opts.prefer_free_formats,
|
||||||
'verbose': opts.verbose,
|
'verbose': opts.verbose,
|
||||||
|
@ -8,8 +8,8 @@ import random
|
|||||||
|
|
||||||
from ..compat import compat_os_name
|
from ..compat import compat_os_name
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
decodeArgument,
|
decode_argument,
|
||||||
encodeFilename,
|
encode_filename,
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
format_bytes,
|
format_bytes,
|
||||||
shell_quote,
|
shell_quote,
|
||||||
@ -181,7 +181,7 @@ class FileDownloader(object):
|
|||||||
def temp_name(self, filename):
|
def temp_name(self, filename):
|
||||||
"""Returns a temporary filename for the given filename."""
|
"""Returns a temporary filename for the given filename."""
|
||||||
if self.params.get('nopart', False) or filename == '-' or \
|
if self.params.get('nopart', False) or filename == '-' or \
|
||||||
(os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
|
(os.path.exists(encode_filename(filename)) and not os.path.isfile(encode_filename(filename))):
|
||||||
return filename
|
return filename
|
||||||
return filename + '.part'
|
return filename + '.part'
|
||||||
|
|
||||||
@ -197,7 +197,7 @@ class FileDownloader(object):
|
|||||||
try:
|
try:
|
||||||
if old_filename == new_filename:
|
if old_filename == new_filename:
|
||||||
return
|
return
|
||||||
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
|
os.rename(encode_filename(old_filename), encode_filename(new_filename))
|
||||||
except (IOError, OSError) as err:
|
except (IOError, OSError) as err:
|
||||||
self.report_error('unable to rename file: %s' % error_to_compat_str(err))
|
self.report_error('unable to rename file: %s' % error_to_compat_str(err))
|
||||||
|
|
||||||
@ -205,7 +205,7 @@ class FileDownloader(object):
|
|||||||
"""Try to set the last-modified time of the given file."""
|
"""Try to set the last-modified time of the given file."""
|
||||||
if last_modified_hdr is None:
|
if last_modified_hdr is None:
|
||||||
return
|
return
|
||||||
if not os.path.isfile(encodeFilename(filename)):
|
if not os.path.isfile(encode_filename(filename)):
|
||||||
return
|
return
|
||||||
timestr = last_modified_hdr
|
timestr = last_modified_hdr
|
||||||
if timestr is None:
|
if timestr is None:
|
||||||
@ -331,13 +331,13 @@ class FileDownloader(object):
|
|||||||
|
|
||||||
nooverwrites_and_exists = (
|
nooverwrites_and_exists = (
|
||||||
self.params.get('nooverwrites', False) and
|
self.params.get('nooverwrites', False) and
|
||||||
os.path.exists(encodeFilename(filename))
|
os.path.exists(encode_filename(filename))
|
||||||
)
|
)
|
||||||
|
|
||||||
if not hasattr(filename, 'write'):
|
if not hasattr(filename, 'write'):
|
||||||
continuedl_and_exists = (
|
continuedl_and_exists = (
|
||||||
self.params.get('continuedl', True) and
|
self.params.get('continuedl', True) and
|
||||||
os.path.isfile(encodeFilename(filename)) and
|
os.path.isfile(encode_filename(filename)) and
|
||||||
not self.params.get('nopart', False)
|
not self.params.get('nopart', False)
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -347,7 +347,7 @@ class FileDownloader(object):
|
|||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'status': 'finished',
|
'status': 'finished',
|
||||||
'total_bytes': os.path.getsize(encodeFilename(filename)),
|
'total_bytes': os.path.getsize(encode_filename(filename)),
|
||||||
})
|
})
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@ -380,7 +380,7 @@ class FileDownloader(object):
|
|||||||
if not self.params.get('verbose', False):
|
if not self.params.get('verbose', False):
|
||||||
return
|
return
|
||||||
|
|
||||||
str_args = [decodeArgument(a) for a in args]
|
str_args = [decode_argument(a) for a in args]
|
||||||
|
|
||||||
if exe is None:
|
if exe is None:
|
||||||
exe = os.path.basename(str_args[0])
|
exe = os.path.basename(str_args[0])
|
||||||
|
@ -17,8 +17,8 @@ from ..utils import (
|
|||||||
cli_valueless_option,
|
cli_valueless_option,
|
||||||
cli_bool_option,
|
cli_bool_option,
|
||||||
cli_configuration_args,
|
cli_configuration_args,
|
||||||
encodeFilename,
|
encode_filename,
|
||||||
encodeArgument,
|
encode_argument,
|
||||||
handle_youtubedl_headers,
|
handle_youtubedl_headers,
|
||||||
check_executable,
|
check_executable,
|
||||||
is_outdated_version,
|
is_outdated_version,
|
||||||
@ -49,7 +49,7 @@ class ExternalFD(FileDownloader):
|
|||||||
'elapsed': time.time() - started,
|
'elapsed': time.time() - started,
|
||||||
}
|
}
|
||||||
if filename != '-':
|
if filename != '-':
|
||||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
fsize = os.path.getsize(encode_filename(tmpfilename))
|
||||||
self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
|
self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(tmpfilename, filename)
|
||||||
status.update({
|
status.update({
|
||||||
@ -98,7 +98,7 @@ class ExternalFD(FileDownloader):
|
|||||||
|
|
||||||
def _call_downloader(self, tmpfilename, info_dict):
|
def _call_downloader(self, tmpfilename, info_dict):
|
||||||
""" Either overwrite this or implement _make_cmd """
|
""" Either overwrite this or implement _make_cmd """
|
||||||
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
|
cmd = [encode_argument(a) for a in self._make_cmd(tmpfilename, info_dict)]
|
||||||
|
|
||||||
self._debug_cmd(cmd)
|
self._debug_cmd(cmd)
|
||||||
|
|
||||||
@ -131,7 +131,7 @@ class CurlFD(ExternalFD):
|
|||||||
return cmd
|
return cmd
|
||||||
|
|
||||||
def _call_downloader(self, tmpfilename, info_dict):
|
def _call_downloader(self, tmpfilename, info_dict):
|
||||||
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
|
cmd = [encode_argument(a) for a in self._make_cmd(tmpfilename, info_dict)]
|
||||||
|
|
||||||
self._debug_cmd(cmd)
|
self._debug_cmd(cmd)
|
||||||
|
|
||||||
@ -311,8 +311,8 @@ class FFmpegFD(ExternalFD):
|
|||||||
else:
|
else:
|
||||||
args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])]
|
args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])]
|
||||||
|
|
||||||
args = [encodeArgument(opt) for opt in args]
|
args = [encode_argument(opt) for opt in args]
|
||||||
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
|
args.append(encode_filename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
|
||||||
|
|
||||||
self._debug_cmd(args)
|
self._debug_cmd(args)
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@ from .common import FileDownloader
|
|||||||
from .http import HttpFD
|
from .http import HttpFD
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
encodeFilename,
|
encode_filename,
|
||||||
sanitize_open,
|
sanitize_open,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
)
|
)
|
||||||
@ -117,7 +117,7 @@ class FragmentFD(FileDownloader):
|
|||||||
if self.__do_ytdl_file(ctx):
|
if self.__do_ytdl_file(ctx):
|
||||||
self._write_ytdl_file(ctx)
|
self._write_ytdl_file(ctx)
|
||||||
if not self.params.get('keep_fragments', False):
|
if not self.params.get('keep_fragments', False):
|
||||||
os.remove(encodeFilename(ctx['fragment_filename_sanitized']))
|
os.remove(encode_filename(ctx['fragment_filename_sanitized']))
|
||||||
del ctx['fragment_filename_sanitized']
|
del ctx['fragment_filename_sanitized']
|
||||||
|
|
||||||
def _prepare_frag_download(self, ctx):
|
def _prepare_frag_download(self, ctx):
|
||||||
@ -150,9 +150,9 @@ class FragmentFD(FileDownloader):
|
|||||||
resume_len = 0
|
resume_len = 0
|
||||||
|
|
||||||
# Establish possible resume length
|
# Establish possible resume length
|
||||||
if os.path.isfile(encodeFilename(tmpfilename)):
|
if os.path.isfile(encode_filename(tmpfilename)):
|
||||||
open_mode = 'ab'
|
open_mode = 'ab'
|
||||||
resume_len = os.path.getsize(encodeFilename(tmpfilename))
|
resume_len = os.path.getsize(encode_filename(tmpfilename))
|
||||||
|
|
||||||
# Should be initialized before ytdl file check
|
# Should be initialized before ytdl file check
|
||||||
ctx.update({
|
ctx.update({
|
||||||
@ -161,7 +161,7 @@ class FragmentFD(FileDownloader):
|
|||||||
})
|
})
|
||||||
|
|
||||||
if self.__do_ytdl_file(ctx):
|
if self.__do_ytdl_file(ctx):
|
||||||
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
|
if os.path.isfile(encode_filename(self.ytdl_filename(ctx['filename']))):
|
||||||
self._read_ytdl_file(ctx)
|
self._read_ytdl_file(ctx)
|
||||||
is_corrupt = ctx.get('ytdl_corrupt') is True
|
is_corrupt = ctx.get('ytdl_corrupt') is True
|
||||||
is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
|
is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
|
||||||
@ -248,7 +248,7 @@ class FragmentFD(FileDownloader):
|
|||||||
def _finish_frag_download(self, ctx):
|
def _finish_frag_download(self, ctx):
|
||||||
ctx['dest_stream'].close()
|
ctx['dest_stream'].close()
|
||||||
if self.__do_ytdl_file(ctx):
|
if self.__do_ytdl_file(ctx):
|
||||||
ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
|
ytdl_filename = encode_filename(self.ytdl_filename(ctx['filename']))
|
||||||
if os.path.isfile(ytdl_filename):
|
if os.path.isfile(ytdl_filename):
|
||||||
os.remove(ytdl_filename)
|
os.remove(ytdl_filename)
|
||||||
elapsed = time.time() - ctx['started']
|
elapsed = time.time() - ctx['started']
|
||||||
@ -257,7 +257,7 @@ class FragmentFD(FileDownloader):
|
|||||||
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
|
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
|
||||||
else:
|
else:
|
||||||
self.try_rename(ctx['tmpfilename'], ctx['filename'])
|
self.try_rename(ctx['tmpfilename'], ctx['filename'])
|
||||||
downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
|
downloaded_bytes = os.path.getsize(encode_filename(ctx['filename']))
|
||||||
|
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'downloaded_bytes': downloaded_bytes,
|
'downloaded_bytes': downloaded_bytes,
|
||||||
|
@ -14,7 +14,7 @@ from ..compat import (
|
|||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ContentTooShortError,
|
ContentTooShortError,
|
||||||
encodeFilename,
|
encode_filename,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
sanitize_open,
|
sanitize_open,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
@ -58,9 +58,9 @@ class HttpFD(FileDownloader):
|
|||||||
|
|
||||||
if self.params.get('continuedl', True):
|
if self.params.get('continuedl', True):
|
||||||
# Establish possible resume length
|
# Establish possible resume length
|
||||||
if os.path.isfile(encodeFilename(ctx.tmpfilename)):
|
if os.path.isfile(encode_filename(ctx.tmpfilename)):
|
||||||
ctx.resume_len = os.path.getsize(
|
ctx.resume_len = os.path.getsize(
|
||||||
encodeFilename(ctx.tmpfilename))
|
encode_filename(ctx.tmpfilename))
|
||||||
|
|
||||||
ctx.is_resume = ctx.resume_len > 0
|
ctx.is_resume = ctx.resume_len > 0
|
||||||
|
|
||||||
@ -221,7 +221,7 @@ class HttpFD(FileDownloader):
|
|||||||
if not to_stdout:
|
if not to_stdout:
|
||||||
ctx.stream.close()
|
ctx.stream.close()
|
||||||
ctx.stream = None
|
ctx.stream = None
|
||||||
ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
|
ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encode_filename(ctx.tmpfilename))
|
||||||
raise RetryDownload(e)
|
raise RetryDownload(e)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
@ -9,8 +9,8 @@ from .common import FileDownloader
|
|||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
check_executable,
|
check_executable,
|
||||||
encodeFilename,
|
encode_filename,
|
||||||
encodeArgument,
|
encode_argument,
|
||||||
get_exe_version,
|
get_exe_version,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -156,7 +156,7 @@ class RtmpFD(FileDownloader):
|
|||||||
if not live and continue_dl:
|
if not live and continue_dl:
|
||||||
args += ['--skip', '1']
|
args += ['--skip', '1']
|
||||||
|
|
||||||
args = [encodeArgument(a) for a in args]
|
args = [encode_argument(a) for a in args]
|
||||||
|
|
||||||
self._debug_cmd(args, exe='rtmpdump')
|
self._debug_cmd(args, exe='rtmpdump')
|
||||||
|
|
||||||
@ -180,15 +180,15 @@ class RtmpFD(FileDownloader):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
|
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
|
||||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
prevsize = os.path.getsize(encode_filename(tmpfilename))
|
||||||
self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize)
|
self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize)
|
||||||
time.sleep(5.0) # This seems to be needed
|
time.sleep(5.0) # This seems to be needed
|
||||||
args = basic_args + ['--resume']
|
args = basic_args + ['--resume']
|
||||||
if retval == RD_FAILED:
|
if retval == RD_FAILED:
|
||||||
args += ['--skip', '1']
|
args += ['--skip', '1']
|
||||||
args = [encodeArgument(a) for a in args]
|
args = [encode_argument(a) for a in args]
|
||||||
retval = run_rtmpdump(args)
|
retval = run_rtmpdump(args)
|
||||||
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
cursize = os.path.getsize(encode_filename(tmpfilename))
|
||||||
if prevsize == cursize and retval == RD_FAILED:
|
if prevsize == cursize and retval == RD_FAILED:
|
||||||
break
|
break
|
||||||
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
||||||
@ -197,7 +197,7 @@ class RtmpFD(FileDownloader):
|
|||||||
retval = RD_SUCCESS
|
retval = RD_SUCCESS
|
||||||
break
|
break
|
||||||
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
|
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
|
||||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
fsize = os.path.getsize(encode_filename(tmpfilename))
|
||||||
self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize)
|
self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize)
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(tmpfilename, filename)
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
|
@ -6,7 +6,7 @@ import subprocess
|
|||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
check_executable,
|
check_executable,
|
||||||
encodeFilename,
|
encode_filename,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -31,7 +31,7 @@ class RtspFD(FileDownloader):
|
|||||||
|
|
||||||
retval = subprocess.call(args)
|
retval = subprocess.call(args)
|
||||||
if retval == 0:
|
if retval == 0:
|
||||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
fsize = os.path.getsize(encode_filename(tmpfilename))
|
||||||
self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
|
self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(tmpfilename, filename)
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
|
@ -13,7 +13,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -131,7 +131,7 @@ class ABCIViewIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
video_params = self._download_json(
|
video_params = self._download_json(
|
||||||
'https://iview.abc.net.au/api/programs/' + video_id, video_id)
|
'https://iview.abc.net.au/api/programs/' + video_id, video_id)
|
||||||
title = unescapeHTML(video_params.get('title') or video_params['seriesTitle'])
|
title = unescape_html(video_params.get('title') or video_params['seriesTitle'])
|
||||||
stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream'))
|
stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream'))
|
||||||
|
|
||||||
house_number = video_params.get('episodeHouseNumber') or video_id
|
house_number = video_params.get('episodeHouseNumber') or video_id
|
||||||
@ -179,7 +179,7 @@ class ABCIViewIE(InfoExtractor):
|
|||||||
'thumbnail': video_params.get('thumbnail'),
|
'thumbnail': video_params.get('thumbnail'),
|
||||||
'duration': int_or_none(video_params.get('eventDuration')),
|
'duration': int_or_none(video_params.get('eventDuration')),
|
||||||
'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
|
'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
|
||||||
'series': unescapeHTML(video_params.get('seriesTitle')),
|
'series': unescape_html(video_params.get('seriesTitle')),
|
||||||
'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
|
'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
|
||||||
'season_number': int_or_none(self._search_regex(
|
'season_number': int_or_none(self._search_regex(
|
||||||
r'\bSeries\s+(\d+)\b', title, 'season number', default=None)),
|
r'\bSeries\s+(\d+)\b', title, 'season number', default=None)),
|
||||||
|
@ -11,7 +11,7 @@ from ..compat import (
|
|||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@ -1385,7 +1385,7 @@ class AdobePassIE(InfoExtractor):
|
|||||||
if not redirect_url:
|
if not redirect_url:
|
||||||
return None
|
return None
|
||||||
if url:
|
if url:
|
||||||
redirect_url = compat_urlparse.urljoin(url, unescapeHTML(redirect_url))
|
redirect_url = compat_urlparse.urljoin(url, unescape_html(redirect_url))
|
||||||
return redirect_url
|
return redirect_url
|
||||||
|
|
||||||
mvpd_headers = {
|
mvpd_headers = {
|
||||||
@ -1520,7 +1520,7 @@ class AdobePassIE(InfoExtractor):
|
|||||||
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
|
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
|
||||||
count += 1
|
count += 1
|
||||||
continue
|
continue
|
||||||
authn_token = unescapeHTML(xml_text(session, 'authnToken'))
|
authn_token = unescape_html(xml_text(session, 'authnToken'))
|
||||||
requestor_info['authn_token'] = authn_token
|
requestor_info['authn_token'] = authn_token
|
||||||
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
|
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
|
||||||
|
|
||||||
@ -1543,7 +1543,7 @@ class AdobePassIE(InfoExtractor):
|
|||||||
continue
|
continue
|
||||||
if '<error' in authorize:
|
if '<error' in authorize:
|
||||||
raise ExtractorError(xml_text(authorize, 'details'), expected=True)
|
raise ExtractorError(xml_text(authorize, 'details'), expected=True)
|
||||||
authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
|
authz_token = unescape_html(xml_text(authorize, 'authzToken'))
|
||||||
requestor_info[guid] = authz_token
|
requestor_info[guid] = authz_token
|
||||||
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
|
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ from .theplatform import ThePlatformIE
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
)
|
)
|
||||||
@ -219,7 +219,7 @@ class HistoryTopicIE(AENetworksBaseIE):
|
|||||||
if video_display_id:
|
if video_display_id:
|
||||||
webpage = self._download_webpage(url, video_display_id)
|
webpage = self._download_webpage(url, video_display_id)
|
||||||
release_url, video_id = re.search(r"_videoPlayer.play\('([^']+)'\s*,\s*'[^']+'\s*,\s*'(\d+)'\)", webpage).groups()
|
release_url, video_id = re.search(r"_videoPlayer.play\('([^']+)'\s*,\s*'[^']+'\s*,\s*'(\d+)'\)", webpage).groups()
|
||||||
release_url = unescapeHTML(release_url)
|
release_url = unescape_html(release_url)
|
||||||
|
|
||||||
return self.theplatform_url_result(
|
return self.theplatform_url_result(
|
||||||
release_url, video_id, {
|
release_url, video_id, {
|
||||||
|
@ -17,7 +17,7 @@ from ..utils import (
|
|||||||
intlist_to_bytes,
|
intlist_to_bytes,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -272,7 +272,7 @@ class AnvatoIE(InfoExtractor):
|
|||||||
entries = []
|
entries = []
|
||||||
for mobj in re.finditer(AnvatoIE._ANVP_RE, webpage):
|
for mobj in re.finditer(AnvatoIE._ANVP_RE, webpage):
|
||||||
anvplayer_data = ie._parse_json(
|
anvplayer_data = ie._parse_json(
|
||||||
mobj.group('anvp'), video_id, transform_source=unescapeHTML,
|
mobj.group('anvp'), video_id, transform_source=unescape_html,
|
||||||
fatal=False)
|
fatal=False)
|
||||||
if not anvplayer_data:
|
if not anvplayer_data:
|
||||||
continue
|
continue
|
||||||
|
@ -5,7 +5,7 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -27,7 +27,7 @@ class ATVAtIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
video_data = self._parse_json(unescapeHTML(self._search_regex(
|
video_data = self._parse_json(unescape_html(self._search_regex(
|
||||||
[r'flashPlayerOptions\s*=\s*(["\'])(?P<json>(?:(?!\1).)+)\1',
|
[r'flashPlayerOptions\s*=\s*(["\'])(?P<json>(?:(?!\1).)+)\1',
|
||||||
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P<json>[^"]+)"'],
|
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P<json>[^"]+)"'],
|
||||||
webpage, 'player data', group='json')),
|
webpage, 'player data', group='json')),
|
||||||
|
@ -4,7 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import unescapeHTML
|
from ..utils import unescape_html
|
||||||
|
|
||||||
|
|
||||||
class BaiduVideoIE(InfoExtractor):
|
class BaiduVideoIE(InfoExtractor):
|
||||||
@ -43,7 +43,7 @@ class BaiduVideoIE(InfoExtractor):
|
|||||||
'xqinfo', category, playlist_id, 'Download playlist JSON metadata')
|
'xqinfo', category, playlist_id, 'Download playlist JSON metadata')
|
||||||
|
|
||||||
playlist_title = playlist_detail['title']
|
playlist_title = playlist_detail['title']
|
||||||
playlist_description = unescapeHTML(playlist_detail.get('intro'))
|
playlist_description = unescape_html(playlist_detail.get('intro'))
|
||||||
|
|
||||||
episodes_detail = self._call_api(
|
episodes_detail = self._call_api(
|
||||||
'xqsingle', category, playlist_id, 'Download episodes JSON metadata')
|
'xqsingle', category, playlist_id, 'Download episodes JSON metadata')
|
||||||
|
@ -17,7 +17,7 @@ from ..utils import (
|
|||||||
parse_filesize,
|
parse_filesize,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
@ -141,7 +141,7 @@ class BandcampIE(InfoExtractor):
|
|||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
|
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
|
||||||
'blob', group='blob'),
|
'blob', group='blob'),
|
||||||
track_id, transform_source=unescapeHTML)
|
track_id, transform_source=unescape_html)
|
||||||
|
|
||||||
info = try_get(
|
info = try_get(
|
||||||
blob, (lambda x: x['digital_items'][0],
|
blob, (lambda x: x['digital_items'][0],
|
||||||
@ -359,7 +359,7 @@ class BandcampWeeklyIE(InfoExtractor):
|
|||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
|
r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
|
||||||
'blob', group='blob'),
|
'blob', group='blob'),
|
||||||
video_id, transform_source=unescapeHTML)
|
video_id, transform_source=unescape_html)
|
||||||
|
|
||||||
show = blob['bcw_show']
|
show = blob['bcw_show']
|
||||||
|
|
||||||
|
@ -16,7 +16,7 @@ from ..utils import (
|
|||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
@ -895,7 +895,7 @@ class BBCIE(BBCCoUkIE):
|
|||||||
if data_playables:
|
if data_playables:
|
||||||
for _, data_playable_json in data_playables:
|
for _, data_playable_json in data_playables:
|
||||||
data_playable = self._parse_json(
|
data_playable = self._parse_json(
|
||||||
unescapeHTML(data_playable_json), playlist_id, fatal=False)
|
unescape_html(data_playable_json), playlist_id, fatal=False)
|
||||||
if not data_playable:
|
if not data_playable:
|
||||||
continue
|
continue
|
||||||
settings = data_playable.get('settings', {})
|
settings = data_playable.get('settings', {})
|
||||||
|
@ -4,7 +4,7 @@ from __future__ import unicode_literals
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -32,8 +32,8 @@ class BildIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': unescapeHTML(video_data['title']).strip(),
|
'title': unescape_html(video_data['title']).strip(),
|
||||||
'description': unescapeHTML(video_data.get('description')),
|
'description': unescape_html(video_data.get('description')),
|
||||||
'url': video_data['clipList'][0]['srces'][0]['src'],
|
'url': video_data['clipList'][0]['srces'][0]['src'],
|
||||||
'thumbnail': video_data.get('poster'),
|
'thumbnail': video_data.get('poster'),
|
||||||
'duration': int_or_none(video_data.get('durationSec')),
|
'duration': int_or_none(video_data.get('durationSec')),
|
||||||
|
@ -6,7 +6,7 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
orderedSet,
|
ordered_set,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -54,7 +54,7 @@ class BitChuteIE(InfoExtractor):
|
|||||||
|
|
||||||
formats = [
|
formats = [
|
||||||
{'url': format_url}
|
{'url': format_url}
|
||||||
for format_url in orderedSet(format_urls)]
|
for format_url in ordered_set(format_urls)]
|
||||||
self._check_formats(formats, video_id)
|
self._check_formats(formats, video_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ from ..utils import (
|
|||||||
js_to_json,
|
js_to_json,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
clean_html,
|
clean_html,
|
||||||
@ -259,7 +259,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
content=([\'"])(?P<url>https?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2
|
content=([\'"])(?P<url>https?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2
|
||||||
''', webpage)
|
''', webpage)
|
||||||
if url_m:
|
if url_m:
|
||||||
url = unescapeHTML(url_m.group('url'))
|
url = unescape_html(url_m.group('url'))
|
||||||
# Some sites don't add it, we can't download with this url, for example:
|
# Some sites don't add it, we can't download with this url, for example:
|
||||||
# http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
|
# http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
|
||||||
if 'playerKey' in url or 'videoId' in url or 'idVideo' in url:
|
if 'playerKey' in url or 'videoId' in url or 'idVideo' in url:
|
||||||
|
@ -17,7 +17,7 @@ from ..utils import (
|
|||||||
xpath_element,
|
xpath_element,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
@ -145,7 +145,7 @@ class CBCIE(InfoExtractor):
|
|||||||
media_ids.extend(re.findall(media_id_re, webpage))
|
media_ids.extend(re.findall(media_id_re, webpage))
|
||||||
entries.extend([
|
entries.extend([
|
||||||
self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
||||||
for media_id in orderedSet(media_ids)])
|
for media_id in ordered_set(media_ids)])
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, display_id, strip_or_none(title),
|
entries, display_id, strip_or_none(title),
|
||||||
self._og_search_description(webpage))
|
self._og_search_description(webpage))
|
||||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
USER_AGENTS,
|
USER_AGENTS,
|
||||||
@ -277,7 +277,7 @@ class CeskaTelevizePoradyIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
data_url = update_url_query(unescapeHTML(self._search_regex(
|
data_url = update_url_query(unescape_html(self._search_regex(
|
||||||
(r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
(r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||||
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
|
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
|
||||||
webpage, 'iframe player url', group='url')), query={
|
webpage, 'iframe player url', group='url')), query={
|
||||||
|
@ -9,7 +9,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
qualities,
|
qualities,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -108,7 +108,7 @@ class Channel9IE(InfoExtractor):
|
|||||||
episode_data = self._search_regex(
|
episode_data = self._search_regex(
|
||||||
r"data-episode='([^']+)'", webpage, 'episode data', default=None)
|
r"data-episode='([^']+)'", webpage, 'episode data', default=None)
|
||||||
if episode_data:
|
if episode_data:
|
||||||
episode_data = self._parse_json(unescapeHTML(
|
episode_data = self._parse_json(unescape_html(
|
||||||
episode_data), content_path)
|
episode_data), content_path)
|
||||||
content_id = episode_data['contentId']
|
content_id = episode_data['contentId']
|
||||||
is_session = '/Sessions(' in episode_data['api']
|
is_session = '/Sessions(' in episode_data['api']
|
||||||
|
@ -6,7 +6,7 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -36,7 +36,7 @@ class CJSWIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, episode_id)
|
webpage = self._download_webpage(url, episode_id)
|
||||||
|
|
||||||
title = unescapeHTML(self._search_regex(
|
title = unescape_html(self._search_regex(
|
||||||
(r'<h1[^>]+class=["\']episode-header__title["\'][^>]*>(?P<title>[^<]+)',
|
(r'<h1[^>]+class=["\']episode-header__title["\'][^>]*>(?P<title>[^<]+)',
|
||||||
r'data-audio-title=(["\'])(?P<title>(?:(?!\1).)+)\1'),
|
r'data-audio-title=(["\'])(?P<title>(?:(?!\1).)+)\1'),
|
||||||
webpage, 'title', group='title'))
|
webpage, 'title', group='title'))
|
||||||
|
@ -54,7 +54,7 @@ from ..utils import (
|
|||||||
js_to_json,
|
js_to_json,
|
||||||
JSON_LD_RE,
|
JSON_LD_RE,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
parse_codecs,
|
parse_codecs,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
@ -62,7 +62,7 @@ from ..utils import (
|
|||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_Request,
|
update_Request,
|
||||||
@ -932,7 +932,7 @@ class InfoExtractor(object):
|
|||||||
return video_info
|
return video_info
|
||||||
|
|
||||||
def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None):
|
def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None):
|
||||||
urls = orderedSet(
|
urls = ordered_set(
|
||||||
self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
|
self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
|
||||||
for m in matches)
|
for m in matches)
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
@ -1083,7 +1083,7 @@ class InfoExtractor(object):
|
|||||||
escaped = self._search_regex(og_regexes, html, name, flags=re.DOTALL, **kargs)
|
escaped = self._search_regex(og_regexes, html, name, flags=re.DOTALL, **kargs)
|
||||||
if escaped is None:
|
if escaped is None:
|
||||||
return None
|
return None
|
||||||
return unescapeHTML(escaped)
|
return unescape_html(escaped)
|
||||||
|
|
||||||
def _og_search_thumbnail(self, html, **kargs):
|
def _og_search_thumbnail(self, html, **kargs):
|
||||||
return self._og_search_property('image', html, 'thumbnail URL', fatal=False, **kargs)
|
return self._og_search_property('image', html, 'thumbnail URL', fatal=False, **kargs)
|
||||||
@ -1220,8 +1220,8 @@ class InfoExtractor(object):
|
|||||||
assert e['@type'] == 'VideoObject'
|
assert e['@type'] == 'VideoObject'
|
||||||
info.update({
|
info.update({
|
||||||
'url': url_or_none(e.get('contentUrl')),
|
'url': url_or_none(e.get('contentUrl')),
|
||||||
'title': unescapeHTML(e.get('name')),
|
'title': unescape_html(e.get('name')),
|
||||||
'description': unescapeHTML(e.get('description')),
|
'description': unescape_html(e.get('description')),
|
||||||
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
|
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
|
||||||
'duration': parse_duration(e.get('duration')),
|
'duration': parse_duration(e.get('duration')),
|
||||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||||
@ -1239,11 +1239,11 @@ class InfoExtractor(object):
|
|||||||
if expected_type is not None and expected_type != item_type:
|
if expected_type is not None and expected_type != item_type:
|
||||||
return info
|
return info
|
||||||
if item_type in ('TVEpisode', 'Episode'):
|
if item_type in ('TVEpisode', 'Episode'):
|
||||||
episode_name = unescapeHTML(e.get('name'))
|
episode_name = unescape_html(e.get('name'))
|
||||||
info.update({
|
info.update({
|
||||||
'episode': episode_name,
|
'episode': episode_name,
|
||||||
'episode_number': int_or_none(e.get('episodeNumber')),
|
'episode_number': int_or_none(e.get('episodeNumber')),
|
||||||
'description': unescapeHTML(e.get('description')),
|
'description': unescape_html(e.get('description')),
|
||||||
})
|
})
|
||||||
if not info.get('title') and episode_name:
|
if not info.get('title') and episode_name:
|
||||||
info['title'] = episode_name
|
info['title'] = episode_name
|
||||||
@ -1252,19 +1252,19 @@ class InfoExtractor(object):
|
|||||||
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
|
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
|
||||||
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
|
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
|
||||||
if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
|
if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
|
||||||
info['series'] = unescapeHTML(part_of_series.get('name'))
|
info['series'] = unescape_html(part_of_series.get('name'))
|
||||||
elif item_type == 'Movie':
|
elif item_type == 'Movie':
|
||||||
info.update({
|
info.update({
|
||||||
'title': unescapeHTML(e.get('name')),
|
'title': unescape_html(e.get('name')),
|
||||||
'description': unescapeHTML(e.get('description')),
|
'description': unescape_html(e.get('description')),
|
||||||
'duration': parse_duration(e.get('duration')),
|
'duration': parse_duration(e.get('duration')),
|
||||||
'timestamp': unified_timestamp(e.get('dateCreated')),
|
'timestamp': unified_timestamp(e.get('dateCreated')),
|
||||||
})
|
})
|
||||||
elif item_type in ('Article', 'NewsArticle'):
|
elif item_type in ('Article', 'NewsArticle'):
|
||||||
info.update({
|
info.update({
|
||||||
'timestamp': parse_iso8601(e.get('datePublished')),
|
'timestamp': parse_iso8601(e.get('datePublished')),
|
||||||
'title': unescapeHTML(e.get('headline')),
|
'title': unescape_html(e.get('headline')),
|
||||||
'description': unescapeHTML(e.get('articleBody')),
|
'description': unescape_html(e.get('articleBody')),
|
||||||
})
|
})
|
||||||
elif item_type == 'VideoObject':
|
elif item_type == 'VideoObject':
|
||||||
extract_video_object(e)
|
extract_video_object(e)
|
||||||
@ -2628,7 +2628,7 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
entry = {
|
entry = {
|
||||||
'id': this_video_id,
|
'id': this_video_id,
|
||||||
'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
|
'title': unescape_html(video_data['title'] if require_title else video_data.get('title')),
|
||||||
'description': video_data.get('description'),
|
'description': video_data.get('description'),
|
||||||
'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))),
|
'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))),
|
||||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||||
|
@ -14,7 +14,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -111,7 +111,7 @@ class CondeNastIE(InfoExtractor):
|
|||||||
base_url = '%s://%s' % (url_object.scheme, url_object.netloc)
|
base_url = '%s://%s' % (url_object.scheme, url_object.netloc)
|
||||||
m_paths = re.finditer(
|
m_paths = re.finditer(
|
||||||
r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage)
|
r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage)
|
||||||
paths = orderedSet(m.group(1) for m in m_paths)
|
paths = ordered_set(m.group(1) for m in m_paths)
|
||||||
build_url = lambda path: compat_urlparse.urljoin(base_url, path)
|
build_url = lambda path: compat_urlparse.urljoin(base_url, path)
|
||||||
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
||||||
return self.playlist_result(entries, playlist_title=title)
|
return self.playlist_result(entries, playlist_title=title)
|
||||||
|
@ -11,7 +11,7 @@ from ..utils import (
|
|||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
from .senateisvp import SenateISVPIE
|
from .senateisvp import SenateISVPIE
|
||||||
from .ustream import UstreamIE
|
from .ustream import UstreamIE
|
||||||
@ -154,12 +154,12 @@ class CSpanIE(InfoExtractor):
|
|||||||
for quality in f.get('qualities', []):
|
for quality in f.get('qualities', []):
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': '%s-%sp' % (get_text_attr(quality, 'bitrate'), get_text_attr(quality, 'height')),
|
'format_id': '%s-%sp' % (get_text_attr(quality, 'bitrate'), get_text_attr(quality, 'height')),
|
||||||
'url': unescapeHTML(get_text_attr(quality, 'file')),
|
'url': unescape_html(get_text_attr(quality, 'file')),
|
||||||
'height': int_or_none(get_text_attr(quality, 'height')),
|
'height': int_or_none(get_text_attr(quality, 'height')),
|
||||||
'tbr': int_or_none(get_text_attr(quality, 'bitrate')),
|
'tbr': int_or_none(get_text_attr(quality, 'bitrate')),
|
||||||
})
|
})
|
||||||
if not formats:
|
if not formats:
|
||||||
path = unescapeHTML(get_text_attr(f, 'path'))
|
path = unescape_html(get_text_attr(f, 'path'))
|
||||||
if not path:
|
if not path:
|
||||||
continue
|
continue
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
|
@ -4,7 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import orderedSet
|
from ..utils import ordered_set
|
||||||
|
|
||||||
|
|
||||||
class CTVNewsIE(InfoExtractor):
|
class CTVNewsIE(InfoExtractor):
|
||||||
@ -63,6 +63,6 @@ class CTVNewsIE(InfoExtractor):
|
|||||||
'ot': 'example.AjaxPageLayout.ot',
|
'ot': 'example.AjaxPageLayout.ot',
|
||||||
'maxItemsPerPage': 1000000,
|
'maxItemsPerPage': 1000000,
|
||||||
})
|
})
|
||||||
entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet(
|
entries = [ninecninemedia_url_result(clip_id) for clip_id in ordered_set(
|
||||||
re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
|
re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
|
||||||
return self.playlist_result(entries, page_id)
|
return self.playlist_result(entries, page_id)
|
||||||
|
@ -9,7 +9,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
determine_protocol,
|
determine_protocol,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -40,7 +40,7 @@ class DailyMailIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_data = self._parse_json(self._search_regex(
|
video_data = self._parse_json(self._search_regex(
|
||||||
r"data-opts='({.+?})'", webpage, 'video data'), video_id)
|
r"data-opts='({.+?})'", webpage, 'video data'), video_id)
|
||||||
title = unescapeHTML(video_data['title'])
|
title = unescape_html(video_data['title'])
|
||||||
|
|
||||||
sources_url = (try_get(
|
sources_url = (try_get(
|
||||||
video_data,
|
video_data,
|
||||||
@ -78,7 +78,7 @@ class DailyMailIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': unescapeHTML(video_data.get('descr')),
|
'description': unescape_html(video_data.get('descr')),
|
||||||
'thumbnail': video_data.get('poster') or video_data.get('thumbnail'),
|
'thumbnail': video_data.get('poster') or video_data.get('thumbnail'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -23,7 +23,7 @@ from ..utils import (
|
|||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
@ -140,7 +140,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
# Look for embedded Dailymotion player
|
# Look for embedded Dailymotion player
|
||||||
matches = re.findall(
|
matches = re.findall(
|
||||||
r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
|
r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
|
||||||
return list(map(lambda m: unescapeHTML(m[1]), matches))
|
return list(map(lambda m: unescape_html(m[1]), matches))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
@ -481,7 +481,7 @@ class DailymotionUserIE(DailymotionBaseInfoExtractor):
|
|||||||
user = mobj.group('user')
|
user = mobj.group('user')
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'https://www.dailymotion.com/user/%s' % user, user)
|
'https://www.dailymotion.com/user/%s' % user, user)
|
||||||
full_user = unescapeHTML(self._html_search_regex(
|
full_user = unescape_html(self._html_search_regex(
|
||||||
r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
|
r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
|
||||||
webpage, 'user'))
|
webpage, 'user'))
|
||||||
|
|
||||||
|
@ -16,7 +16,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -162,7 +162,7 @@ class DaumClipIE(InfoExtractor):
|
|||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': 'http://tvpot.daum.net/v/%s' % clip_info['vid'],
|
'url': 'http://tvpot.daum.net/v/%s' % clip_info['vid'],
|
||||||
'title': unescapeHTML(clip_info['title']),
|
'title': unescape_html(clip_info['title']),
|
||||||
'thumbnail': clip_info.get('thumb_url'),
|
'thumbnail': clip_info.get('thumb_url'),
|
||||||
'description': clip_info.get('contents'),
|
'description': clip_info.get('contents'),
|
||||||
'duration': int_or_none(clip_info.get('duration')),
|
'duration': int_or_none(clip_info.get('duration')),
|
||||||
|
@ -7,7 +7,7 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -70,7 +70,7 @@ class DeezerPlaylistIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
artists = ', '.join(
|
artists = ', '.join(
|
||||||
orderedSet(a['ART_NAME'] for a in s['ARTISTS']))
|
ordered_set(a['ART_NAME'] for a in s['ARTISTS']))
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': s['SNG_ID'],
|
'id': s['SNG_ID'],
|
||||||
'duration': int_or_none(s.get('DURATION')),
|
'duration': int_or_none(s.get('DURATION')),
|
||||||
|
@ -10,7 +10,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
remove_end,
|
remove_end,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -158,7 +158,7 @@ class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE):
|
|||||||
for mobj in re.finditer(r'data-json=(["\'])(?P<json>{.+?})\1', webpage):
|
for mobj in re.finditer(r'data-json=(["\'])(?P<json>{.+?})\1', webpage):
|
||||||
data = self._parse_json(
|
data = self._parse_json(
|
||||||
mobj.group('json'), display_id,
|
mobj.group('json'), display_id,
|
||||||
transform_source=unescapeHTML, fatal=False)
|
transform_source=unescape_html, fatal=False)
|
||||||
if not isinstance(data, dict) or data.get('type') != 'episode':
|
if not isinstance(data, dict) or data.get('type') != 'episode':
|
||||||
continue
|
continue
|
||||||
episode_url = data.get('socialUrl')
|
episode_url = data.get('socialUrl')
|
||||||
|
@ -8,7 +8,7 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
@ -105,7 +105,7 @@ class DouyuTVIE(InfoExtractor):
|
|||||||
'aid': 'pcclient'
|
'aid': 'pcclient'
|
||||||
})['data']['live_url']
|
})['data']['live_url']
|
||||||
|
|
||||||
title = self._live_title(unescapeHTML(room['room_name']))
|
title = self._live_title(unescape_html(room['room_name']))
|
||||||
description = room.get('show_details')
|
description = room.get('show_details')
|
||||||
thumbnail = room.get('room_src')
|
thumbnail = room.get('room_src')
|
||||||
uploader = room.get('nickname')
|
uploader = room.get('nickname')
|
||||||
|
@ -6,7 +6,7 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
js_to_json,
|
js_to_json,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -41,7 +41,7 @@ class DRBonanzaIE(InfoExtractor):
|
|||||||
r'(?s)currentAsset\s*=\s*({.+?})\s*</script', webpage, 'asset'),
|
r'(?s)currentAsset\s*=\s*({.+?})\s*</script', webpage, 'asset'),
|
||||||
display_id, transform_source=js_to_json)
|
display_id, transform_source=js_to_json)
|
||||||
|
|
||||||
title = unescapeHTML(asset['AssetTitle']).strip()
|
title = unescape_html(asset['AssetTitle']).strip()
|
||||||
|
|
||||||
def extract(field):
|
def extract(field):
|
||||||
return self._search_regex(
|
return self._search_regex(
|
||||||
|
@ -10,7 +10,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -99,7 +99,7 @@ class DVTVIE(InfoExtractor):
|
|||||||
data.update(self._parse_json(
|
data.update(self._parse_json(
|
||||||
live_js, video_id, transform_source=js_to_json))
|
live_js, video_id, transform_source=js_to_json))
|
||||||
|
|
||||||
title = unescapeHTML(data['title'])
|
title = unescape_html(data['title'])
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for video in data['sources']:
|
for video in data['sources']:
|
||||||
|
@ -6,7 +6,7 @@ from .common import InfoExtractor
|
|||||||
from ..compat import compat_urllib_parse_urlencode
|
from ..compat import compat_urllib_parse_urlencode
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
unescapeHTML
|
unescape_html
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -77,7 +77,7 @@ class EroProfileIE(InfoExtractor):
|
|||||||
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
|
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
|
||||||
webpage, 'video id', default=None)
|
webpage, 'video id', default=None)
|
||||||
|
|
||||||
video_url = unescapeHTML(self._search_regex(
|
video_url = unescape_html(self._search_regex(
|
||||||
r'<source src="([^"]+)', webpage, 'video url'))
|
r'<source src="([^"]+)', webpage, 'video url'))
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'Title:</th><td>([^<]+)</td>', webpage, 'title')
|
r'Title:</th><td>([^<]+)</td>', webpage, 'title')
|
||||||
|
@ -5,7 +5,7 @@ from .common import InfoExtractor
|
|||||||
from ..compat import compat_urlparse
|
from ..compat import compat_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
qualities,
|
qualities,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
@ -56,7 +56,7 @@ class EuropaIE(InfoExtractor):
|
|||||||
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||||
preferred_lang = query.get('sitelang', ('en', ))[0]
|
preferred_lang = query.get('sitelang', ('en', ))[0]
|
||||||
|
|
||||||
preferred_langs = orderedSet((preferred_lang, 'en', 'int'))
|
preferred_langs = ordered_set((preferred_lang, 'en', 'int'))
|
||||||
|
|
||||||
title = get_item('title', preferred_langs) or video_id
|
title = get_item('title', preferred_langs) or video_id
|
||||||
description = get_item('description', preferred_langs)
|
description = get_item('description', preferred_langs)
|
||||||
|
@ -7,7 +7,7 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -61,7 +61,7 @@ class ExpressenIE(InfoExtractor):
|
|||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1' % name,
|
r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1' % name,
|
||||||
webpage, 'info', group='value'),
|
webpage, 'info', group='value'),
|
||||||
display_id, transform_source=unescapeHTML)
|
display_id, transform_source=unescape_html)
|
||||||
|
|
||||||
info = extract_data('video-tracking-info')
|
info = extract_data('video-tracking-info')
|
||||||
video_id = info['videoId']
|
video_id = info['videoId']
|
||||||
|
@ -7,7 +7,7 @@ from ..compat import (
|
|||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
url_basename,
|
url_basename,
|
||||||
dict_get,
|
dict_get,
|
||||||
)
|
)
|
||||||
@ -51,7 +51,7 @@ class GameSpotIE(OnceIE):
|
|||||||
webpage = self._download_webpage(url, page_id)
|
webpage = self._download_webpage(url, page_id)
|
||||||
data_video_json = self._search_regex(
|
data_video_json = self._search_regex(
|
||||||
r'data-video=["\'](.*?)["\']', webpage, 'data video')
|
r'data-video=["\'](.*?)["\']', webpage, 'data video')
|
||||||
data_video = self._parse_json(unescapeHTML(data_video_json), page_id)
|
data_video = self._parse_json(unescape_html(data_video_json), page_id)
|
||||||
streams = data_video['videoStreams']
|
streams = data_video['videoStreams']
|
||||||
|
|
||||||
manifest_url = None
|
manifest_url = None
|
||||||
@ -111,7 +111,7 @@ class GameSpotIE(OnceIE):
|
|||||||
onceux_json = self._search_regex(
|
onceux_json = self._search_regex(
|
||||||
r'data-onceux-options=["\'](.*?)["\']', webpage, 'data video', default=None)
|
r'data-onceux-options=["\'](.*?)["\']', webpage, 'data video', default=None)
|
||||||
if onceux_json:
|
if onceux_json:
|
||||||
onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri')
|
onceux_url = self._parse_json(unescape_html(onceux_json), page_id).get('metadataUri')
|
||||||
if onceux_url:
|
if onceux_url:
|
||||||
formats.extend(self._extract_once_formats(re.sub(
|
formats.extend(self._extract_once_formats(re.sub(
|
||||||
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url),
|
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url),
|
||||||
|
@ -25,10 +25,10 @@ from ..utils import (
|
|||||||
KNOWN_EXTENSIONS,
|
KNOWN_EXTENSIONS,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
UnsupportedError,
|
UnsupportedError,
|
||||||
@ -2486,7 +2486,7 @@ class GenericIE(InfoExtractor):
|
|||||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
|
||||||
if m:
|
if m:
|
||||||
playlists = re.findall(
|
playlists = re.findall(
|
||||||
r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
|
r'list\[\]=/playlist/([^/]+)/', unescape_html(m.group('url')))
|
||||||
if playlists:
|
if playlists:
|
||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
|
playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
|
||||||
@ -2515,7 +2515,7 @@ class GenericIE(InfoExtractor):
|
|||||||
# Look for Bandcamp pages with custom domain
|
# Look for Bandcamp pages with custom domain
|
||||||
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
burl = unescapeHTML(mobj.group(1))
|
burl = unescape_html(mobj.group(1))
|
||||||
# Don't set the extractor because it can be a track url or an album
|
# Don't set the extractor because it can be a track url or an album
|
||||||
return self.url_result(burl)
|
return self.url_result(burl)
|
||||||
|
|
||||||
@ -2631,7 +2631,7 @@ class GenericIE(InfoExtractor):
|
|||||||
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
|
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
|
||||||
if matches:
|
if matches:
|
||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
|
matches, video_id, video_title, getter=unescape_html, ie='FunnyOrDie')
|
||||||
|
|
||||||
# Look for BBC iPlayer embed
|
# Look for BBC iPlayer embed
|
||||||
matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
|
matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
|
||||||
@ -2727,7 +2727,7 @@ class GenericIE(InfoExtractor):
|
|||||||
# Look for embedded soundcloud player
|
# Look for embedded soundcloud player
|
||||||
soundcloud_urls = SoundcloudIE._extract_urls(webpage)
|
soundcloud_urls = SoundcloudIE._extract_urls(webpage)
|
||||||
if soundcloud_urls:
|
if soundcloud_urls:
|
||||||
return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
|
return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescape_html, ie=SoundcloudIE.ie_key())
|
||||||
|
|
||||||
# Look for tunein player
|
# Look for tunein player
|
||||||
tunein_urls = TuneInBaseIE._extract_urls(webpage)
|
tunein_urls = TuneInBaseIE._extract_urls(webpage)
|
||||||
@ -2918,7 +2918,7 @@ class GenericIE(InfoExtractor):
|
|||||||
webpage)
|
webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
self._proto_relative_url(unescapeHTML(mobj.group(1))),
|
self._proto_relative_url(unescape_html(mobj.group(1))),
|
||||||
'AdobeTVVideo')
|
'AdobeTVVideo')
|
||||||
|
|
||||||
# Look for Vine embeds
|
# Look for Vine embeds
|
||||||
@ -2927,7 +2927,7 @@ class GenericIE(InfoExtractor):
|
|||||||
webpage)
|
webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
|
self._proto_relative_url(unescape_html(mobj.group(1))), 'Vine')
|
||||||
|
|
||||||
# Look for VODPlatform embeds
|
# Look for VODPlatform embeds
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
@ -2935,7 +2935,7 @@ class GenericIE(InfoExtractor):
|
|||||||
webpage)
|
webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
|
self._proto_relative_url(unescape_html(mobj.group('url'))), 'VODPlatform')
|
||||||
|
|
||||||
# Look for Mangomolo embeds
|
# Look for Mangomolo embeds
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
@ -2947,7 +2947,7 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
info = {
|
info = {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
|
'url': self._proto_relative_url(unescape_html(mobj.group('url'))),
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
'thumbnail': video_thumbnail,
|
'thumbnail': video_thumbnail,
|
||||||
@ -3298,7 +3298,7 @@ class GenericIE(InfoExtractor):
|
|||||||
refresh_header = refresh_header.decode('iso-8859-1')
|
refresh_header = refresh_header.decode('iso-8859-1')
|
||||||
found = re.search(REDIRECT_REGEX, refresh_header)
|
found = re.search(REDIRECT_REGEX, refresh_header)
|
||||||
if found:
|
if found:
|
||||||
new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
|
new_url = compat_urlparse.urljoin(url, unescape_html(found.group(1)))
|
||||||
if new_url != url:
|
if new_url != url:
|
||||||
self.report_following_redirect(new_url)
|
self.report_following_redirect(new_url)
|
||||||
return {
|
return {
|
||||||
@ -3320,8 +3320,8 @@ class GenericIE(InfoExtractor):
|
|||||||
raise UnsupportedError(url)
|
raise UnsupportedError(url)
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for video_url in orderedSet(found):
|
for video_url in ordered_set(found):
|
||||||
video_url = unescapeHTML(video_url)
|
video_url = unescape_html(video_url)
|
||||||
video_url = video_url.replace('\\/', '/')
|
video_url = video_url.replace('\\/', '/')
|
||||||
video_url = compat_urlparse.urljoin(url, video_url)
|
video_url = compat_urlparse.urljoin(url, video_url)
|
||||||
video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
|
video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
|
||||||
|
@ -8,7 +8,7 @@ from ..utils import (
|
|||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
qualities,
|
qualities,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -39,7 +39,7 @@ class GiantBombIE(InfoExtractor):
|
|||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
video = json.loads(unescapeHTML(self._search_regex(
|
video = json.loads(unescape_html(self._search_regex(
|
||||||
r'data-video="([^"]+)"', webpage, 'data-video')))
|
r'data-video="([^"]+)"', webpage, 'data-video')))
|
||||||
|
|
||||||
duration = int_or_none(video.get('lengthSeconds'))
|
duration = int_or_none(video.get('lengthSeconds'))
|
||||||
|
@ -16,7 +16,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -218,7 +218,7 @@ class GloboArticleIE(InfoExtractor):
|
|||||||
video_ids.extend(re.findall(video_regex, webpage))
|
video_ids.extend(re.findall(video_regex, webpage))
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result('globo:%s' % video_id, GloboIE.ie_key())
|
self.url_result('globo:%s' % video_id, GloboIE.ie_key())
|
||||||
for video_id in orderedSet(video_ids)]
|
for video_id in ordered_set(video_ids)]
|
||||||
title = self._og_search_title(webpage, fatal=False)
|
title = self._og_search_title(webpage, fatal=False)
|
||||||
description = self._html_search_meta('description', webpage)
|
description = self._html_search_meta('description', webpage)
|
||||||
return self.playlist_result(entries, display_id, title, description)
|
return self.playlist_result(entries, display_id, title, description)
|
||||||
|
@ -5,7 +5,7 @@ from ..utils import (
|
|||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -82,8 +82,8 @@ class HowStuffWorksIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': '%s' % video_id,
|
'id': '%s' % video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': unescapeHTML(clip_info['clip_title']),
|
'title': unescape_html(clip_info['clip_title']),
|
||||||
'description': unescapeHTML(clip_info.get('caption')),
|
'description': unescape_html(clip_info.get('caption')),
|
||||||
'thumbnail': clip_info.get('video_still_url'),
|
'thumbnail': clip_info.get('video_still_url'),
|
||||||
'duration': int_or_none(clip_info.get('duration')),
|
'duration': int_or_none(clip_info.get('duration')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
@ -18,7 +18,7 @@ from ..utils import (
|
|||||||
encode_data_uri,
|
encode_data_uri,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
url_basename,
|
url_basename,
|
||||||
@ -243,7 +243,7 @@ class LePlaylistIE(InfoExtractor):
|
|||||||
page = self._download_webpage(url, playlist_id)
|
page = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
# Currently old domain names are still used in playlists
|
# Currently old domain names are still used in playlists
|
||||||
media_ids = orderedSet(re.findall(
|
media_ids = ordered_set(re.findall(
|
||||||
r'<a[^>]+href="http://www\.letv\.com/ptv/vplay/(\d+)\.html', page))
|
r'<a[^>]+href="http://www\.letv\.com/ptv/vplay/(\d+)\.html', page))
|
||||||
entries = [self.url_result(LeIE._URL_TEMPLATE % media_id, ie='Le')
|
entries = [self.url_result(LeIE._URL_TEMPLATE % media_id, ie='Le')
|
||||||
for media_id in media_ids]
|
for media_id in media_ids]
|
||||||
|
@ -6,7 +6,7 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
)
|
)
|
||||||
@ -68,7 +68,7 @@ class LEGOIE(InfoExtractor):
|
|||||||
default='http://www.lego.com/%s/mediaplayer/video/' % locale))
|
default='http://www.lego.com/%s/mediaplayer/video/' % locale))
|
||||||
player_url = base_url + video_id
|
player_url = base_url + video_id
|
||||||
player_webpage = self._download_webpage(player_url, video_id)
|
player_webpage = self._download_webpage(player_url, video_id)
|
||||||
video_data = self._parse_json(unescapeHTML(self._search_regex(
|
video_data = self._parse_json(unescape_html(self._search_regex(
|
||||||
r"video='([^']+)'", player_webpage, 'video data')), video_id)
|
r"video='([^']+)'", player_webpage, 'video data')), video_id)
|
||||||
progressive_base = self._search_regex(
|
progressive_base = self._search_regex(
|
||||||
r'data-video-progressive-url="([^"]+)"',
|
r'data-video-progressive-url="([^"]+)"',
|
||||||
|
@ -13,7 +13,7 @@ from ..utils import (
|
|||||||
xpath_attr,
|
xpath_attr,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
@ -305,7 +305,7 @@ class LivestreamOriginalIE(InfoExtractor):
|
|||||||
|
|
||||||
def _extract_folder(self, url, folder_id):
|
def _extract_folder(self, url, folder_id):
|
||||||
webpage = self._download_webpage(url, folder_id)
|
webpage = self._download_webpage(url, folder_id)
|
||||||
paths = orderedSet(re.findall(
|
paths = ordered_set(re.findall(
|
||||||
r'''(?x)(?:
|
r'''(?x)(?:
|
||||||
<li\s+class="folder">\s*<a\s+href="|
|
<li\s+class="folder">\s*<a\s+href="|
|
||||||
<a\s+href="(?=https?://livestre\.am/)
|
<a\s+href="(?=https?://livestre\.am/)
|
||||||
|
@ -6,7 +6,7 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
orderedSet,
|
ordered_set,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
try_get,
|
try_get,
|
||||||
)
|
)
|
||||||
@ -118,7 +118,7 @@ class MarkizaPageIE(InfoExtractor):
|
|||||||
|
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result('http://videoarchiv.markiza.sk/video/%s' % video_id)
|
self.url_result('http://videoarchiv.markiza.sk/video/%s' % video_id)
|
||||||
for video_id in orderedSet(re.findall(
|
for video_id in ordered_set(re.findall(
|
||||||
r'(?:initPlayer_|data-entity=["\']|id=["\']player_)(\d+)',
|
r'(?:initPlayer_|data-entity=["\']|id=["\']player_)(\d+)',
|
||||||
webpage))]
|
webpage))]
|
||||||
|
|
||||||
|
@ -13,7 +13,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urljoin,
|
urljoin,
|
||||||
@ -107,7 +107,7 @@ class MediasiteIE(InfoExtractor):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
return [
|
return [
|
||||||
unescapeHTML(mobj.group('url'))
|
unescape_html(mobj.group('url'))
|
||||||
for mobj in re.finditer(
|
for mobj in re.finditer(
|
||||||
r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/[0-9a-f]{32,34}(?:\?.*?)?)\1',
|
r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/[0-9a-f]{32,34}(?:\?.*?)?)\1',
|
||||||
webpage)]
|
webpage)]
|
||||||
|
@ -4,7 +4,7 @@ from __future__ import unicode_literals
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .pladform import PladformIE
|
from .pladform import PladformIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
@ -46,7 +46,7 @@ class METAIE(InfoExtractor):
|
|||||||
json_str = ''
|
json_str = ''
|
||||||
for i in range(0, len(st_html5), 3):
|
for i in range(0, len(st_html5), 3):
|
||||||
json_str += '�%s;' % st_html5[i:i + 3]
|
json_str += '�%s;' % st_html5[i:i + 3]
|
||||||
uppod_data = self._parse_json(unescapeHTML(json_str), video_id)
|
uppod_data = self._parse_json(unescape_html(json_str), video_id)
|
||||||
error = uppod_data.get('customnotfound')
|
error = uppod_data.get('customnotfound')
|
||||||
if error:
|
if error:
|
||||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||||
|
@ -8,7 +8,7 @@ from ..compat import compat_urlparse
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
InAdvancePagedList,
|
InAdvancePagedList,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
@ -169,7 +169,7 @@ class MotherlessGroupIE(InfoExtractor):
|
|||||||
self.url_result(
|
self.url_result(
|
||||||
compat_urlparse.urljoin(base, '/' + entry_id),
|
compat_urlparse.urljoin(base, '/' + entry_id),
|
||||||
ie=MotherlessIE.ie_key(), video_id=entry_id)
|
ie=MotherlessIE.ie_key(), video_id=entry_id)
|
||||||
for entry_id in orderedSet(re.findall(
|
for entry_id in ordered_set(re.findall(
|
||||||
r'data-codename=["\']([A-Z0-9]+)', webpage))]
|
r'data-codename=["\']([A-Z0-9]+)', webpage))]
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -36,8 +36,8 @@ class MovingImageIE(InfoExtractor):
|
|||||||
r'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name,
|
r'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name,
|
||||||
webpage, 'title', fatal=fatal)
|
webpage, 'title', fatal=fatal)
|
||||||
|
|
||||||
title = unescapeHTML(search_field('Title', fatal=True)).strip('()[]')
|
title = unescape_html(search_field('Title', fatal=True)).strip('()[]')
|
||||||
description = unescapeHTML(search_field('Description'))
|
description = unescape_html(search_field('Description'))
|
||||||
duration = parse_duration(search_field('Running time'))
|
duration = parse_duration(search_field('Running time'))
|
||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
r"image\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
|
r"image\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
|
||||||
|
@ -9,7 +9,7 @@ from ..utils import (
|
|||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -53,10 +53,10 @@ class MSNIE(InfoExtractor):
|
|||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'data-metadata\s*=\s*(["\'])(?P<data>.+?)\1',
|
r'data-metadata\s*=\s*(["\'])(?P<data>.+?)\1',
|
||||||
webpage, 'video data', default='{}', group='data'),
|
webpage, 'video data', default='{}', group='data'),
|
||||||
display_id, transform_source=unescapeHTML)
|
display_id, transform_source=unescape_html)
|
||||||
|
|
||||||
if not video:
|
if not video:
|
||||||
error = unescapeHTML(self._search_regex(
|
error = unescape_html(self._search_regex(
|
||||||
r'data-error=(["\'])(?P<error>.+?)\1',
|
r'data-error=(["\'])(?P<error>.+?)\1',
|
||||||
webpage, 'error', group='error'))
|
webpage, 'error', group='error'))
|
||||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||||
|
@ -18,7 +18,7 @@ from ..utils import (
|
|||||||
strip_or_none,
|
strip_or_none,
|
||||||
timeconvert,
|
timeconvert,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
url_basename,
|
url_basename,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
@ -59,7 +59,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
req.add_header('User-Agent', 'curl/7')
|
req.add_header('User-Agent', 'curl/7')
|
||||||
webpage = self._download_webpage(req, mtvn_id,
|
webpage = self._download_webpage(req, mtvn_id,
|
||||||
'Downloading mobile page')
|
'Downloading mobile page')
|
||||||
metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
|
metrics_url = unescape_html(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
|
||||||
req = HEADRequest(metrics_url)
|
req = HEADRequest(metrics_url)
|
||||||
response = self._request_webpage(req, mtvn_id, 'Resolving url')
|
response = self._request_webpage(req, mtvn_id, 'Resolving url')
|
||||||
url = response.geturl()
|
url = response.geturl()
|
||||||
|
@ -5,7 +5,7 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
from ..utils import unescapeHTML
|
from ..utils import unescape_html
|
||||||
|
|
||||||
|
|
||||||
class NintendoIE(InfoExtractor):
|
class NintendoIE(InfoExtractor):
|
||||||
@ -43,4 +43,4 @@ class NintendoIE(InfoExtractor):
|
|||||||
webpage)]
|
webpage)]
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, page_id, unescapeHTML(self._og_search_title(webpage, fatal=False)))
|
entries, page_id, unescape_html(self._og_search_title(webpage, fatal=False)))
|
||||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
qualities,
|
qualities,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
@ -546,7 +546,7 @@ class NPOPlaylistBaseIE(NPOIE):
|
|||||||
|
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id)
|
self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id)
|
||||||
for video_id in orderedSet(re.findall(self._PLAYLIST_ENTRY_RE, webpage))
|
for video_id in ordered_set(re.findall(self._PLAYLIST_ENTRY_RE, webpage))
|
||||||
]
|
]
|
||||||
|
|
||||||
playlist_title = self._html_search_regex(
|
playlist_title = self._html_search_regex(
|
||||||
|
@ -13,7 +13,7 @@ from ..utils import (
|
|||||||
unified_strdate,
|
unified_strdate,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
qualities,
|
qualities,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -137,7 +137,7 @@ class OdnoklassnikiIE(InfoExtractor):
|
|||||||
raise ExtractorError(error, expected=True)
|
raise ExtractorError(error, expected=True)
|
||||||
|
|
||||||
player = self._parse_json(
|
player = self._parse_json(
|
||||||
unescapeHTML(self._search_regex(
|
unescape_html(self._search_regex(
|
||||||
r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id,
|
r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id,
|
||||||
webpage, 'player', group='player')),
|
webpage, 'player', group='player')),
|
||||||
video_id)
|
video_id)
|
||||||
|
@ -15,7 +15,7 @@ from ..compat import (
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
check_executable,
|
check_executable,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
encodeArgument,
|
encode_argument,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
get_exe_version,
|
get_exe_version,
|
||||||
@ -233,13 +233,13 @@ class PhantomJSwrapper(object):
|
|||||||
out, err = p.communicate()
|
out, err = p.communicate()
|
||||||
if p.returncode != 0:
|
if p.returncode != 0:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Executing JS failed\n:' + encodeArgument(err))
|
'Executing JS failed\n:' + encode_argument(err))
|
||||||
with open(self._TMP_FILES['html'].name, 'rb') as f:
|
with open(self._TMP_FILES['html'].name, 'rb') as f:
|
||||||
html = f.read().decode('utf-8')
|
html = f.read().decode('utf-8')
|
||||||
|
|
||||||
self._load_cookies()
|
self._load_cookies()
|
||||||
|
|
||||||
return (html, encodeArgument(out))
|
return (html, encode_argument(out))
|
||||||
|
|
||||||
|
|
||||||
class OpenloadIE(InfoExtractor):
|
class OpenloadIE(InfoExtractor):
|
||||||
|
@ -7,7 +7,7 @@ from ..compat import compat_urlparse
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
qualities,
|
qualities,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -66,7 +66,7 @@ class OraTVIE(InfoExtractor):
|
|||||||
'id': self._search_regex(
|
'id': self._search_regex(
|
||||||
r'"id"\s*:\s*(\d+)', video_data, 'video id', default=display_id),
|
r'"id"\s*:\s*(\d+)', video_data, 'video id', default=display_id),
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': unescapeHTML(self._og_search_title(webpage)),
|
'title': unescape_html(self._og_search_title(webpage)),
|
||||||
'description': get_element_by_attribute(
|
'description': get_element_by_attribute(
|
||||||
'class', 'video_txt_decription', webpage),
|
'class', 'video_txt_decription', webpage),
|
||||||
'thumbnail': self._proto_relative_url(self._search_regex(
|
'thumbnail': self._proto_relative_url(self._search_regex(
|
||||||
|
@ -10,10 +10,10 @@ from ..utils import (
|
|||||||
float_or_none,
|
float_or_none,
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
remove_end,
|
remove_end,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
@ -67,7 +67,7 @@ class ORFTVthekIE(InfoExtractor):
|
|||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'<div[^>]+class=(["\']).*?VideoPlaylist.*?\1[^>]+data-jsb=(["\'])(?P<json>.+?)\2',
|
r'<div[^>]+class=(["\']).*?VideoPlaylist.*?\1[^>]+data-jsb=(["\'])(?P<json>.+?)\2',
|
||||||
webpage, 'playlist', group='json'),
|
webpage, 'playlist', group='json'),
|
||||||
playlist_id, transform_source=unescapeHTML)['playlist']['videos']
|
playlist_id, transform_source=unescape_html)['playlist']['videos']
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for sd in data_jsb:
|
for sd in data_jsb:
|
||||||
@ -357,7 +357,7 @@ class ORFFM4StoryIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, story_id)
|
webpage = self._download_webpage(url, story_id)
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
all_ids = orderedSet(re.findall(r'data-video(?:id)?="(\d+)"', webpage))
|
all_ids = ordered_set(re.findall(r'data-video(?:id)?="(\d+)"', webpage))
|
||||||
for idx, video_id in enumerate(all_ids):
|
for idx, video_id in enumerate(all_ids):
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
|
'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
|
||||||
|
@ -11,7 +11,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
@ -458,7 +458,7 @@ class PBSIE(InfoExtractor):
|
|||||||
r'<a[^>]+href=["\']#(?:video-|part)\d+["\'][^>]+data-cove[Ii]d=["\'](\d+)',
|
r'<a[^>]+href=["\']#(?:video-|part)\d+["\'][^>]+data-cove[Ii]d=["\'](\d+)',
|
||||||
)
|
)
|
||||||
for p in MULTI_PART_REGEXES:
|
for p in MULTI_PART_REGEXES:
|
||||||
tabbed_videos = orderedSet(re.findall(p, webpage))
|
tabbed_videos = ordered_set(re.findall(p, webpage))
|
||||||
if tabbed_videos:
|
if tabbed_videos:
|
||||||
return tabbed_videos, presumptive_id, upload_date, description
|
return tabbed_videos, presumptive_id, upload_date, description
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -129,7 +129,7 @@ class PeriscopeUserIE(PeriscopeBaseIE):
|
|||||||
webpage = self._download_webpage(url, user_name)
|
webpage = self._download_webpage(url, user_name)
|
||||||
|
|
||||||
data_store = self._parse_json(
|
data_store = self._parse_json(
|
||||||
unescapeHTML(self._search_regex(
|
unescape_html(self._search_regex(
|
||||||
r'data-store=(["\'])(?P<data>.+?)\1',
|
r'data-store=(["\'])(?P<data>.+?)\1',
|
||||||
webpage, 'data store', default='{}', group='data')),
|
webpage, 'data store', default='{}', group='data')),
|
||||||
user_name)
|
user_name)
|
||||||
|
@ -9,7 +9,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
dict_get,
|
dict_get,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -103,7 +103,7 @@ class PikselIE(InfoExtractor):
|
|||||||
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': '-'.join(format_id),
|
'format_id': '-'.join(format_id),
|
||||||
'url': unescapeHTML(http_url),
|
'url': unescape_html(http_url),
|
||||||
'vbr': vbr,
|
'vbr': vbr,
|
||||||
'abr': abr,
|
'abr': abr,
|
||||||
'width': int_or_none(asset_file.get('videoWidth')),
|
'width': int_or_none(asset_file.get('videoWidth')),
|
||||||
|
@ -15,7 +15,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
remove_quotes,
|
remove_quotes,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
@ -320,7 +320,7 @@ class PornHubPlaylistBaseIE(InfoExtractor):
|
|||||||
self.url_result(
|
self.url_result(
|
||||||
'http://www.%s/%s' % (host, video_url),
|
'http://www.%s/%s' % (host, video_url),
|
||||||
PornHubIE.ie_key(), video_title=title)
|
PornHubIE.ie_key(), video_title=title)
|
||||||
for video_url, title in orderedSet(re.findall(
|
for video_url, title in ordered_set(re.findall(
|
||||||
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
|
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
|
||||||
container))
|
container))
|
||||||
]
|
]
|
||||||
|
@ -10,7 +10,7 @@ from ..utils import (
|
|||||||
clean_html,
|
clean_html,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -365,5 +365,5 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):
|
|||||||
for song in cdlist['songlist']]
|
for song in cdlist['songlist']]
|
||||||
|
|
||||||
list_name = cdlist.get('dissname')
|
list_name = cdlist.get('dissname')
|
||||||
list_description = clean_html(unescapeHTML(cdlist.get('desc')))
|
list_description = clean_html(unescape_html(cdlist.get('desc')))
|
||||||
return self.playlist_result(entries, list_id, list_name, list_description)
|
return self.playlist_result(entries, list_id, list_name, list_description)
|
||||||
|
@ -17,7 +17,7 @@ from ..utils import (
|
|||||||
parse_duration,
|
parse_duration,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
@ -272,7 +272,7 @@ class RaiPlayPlaylistIE(InfoExtractor):
|
|||||||
|
|
||||||
title = self._html_search_meta(
|
title = self._html_search_meta(
|
||||||
('programma', 'nomeProgramma'), webpage, 'title')
|
('programma', 'nomeProgramma'), webpage, 'title')
|
||||||
description = unescapeHTML(self._html_search_meta(
|
description = unescape_html(self._html_search_meta(
|
||||||
('description', 'og:description'), webpage, 'description'))
|
('description', 'og:description'), webpage, 'description'))
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
|
@ -10,7 +10,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
@ -99,7 +99,7 @@ class RayWenderlichIE(InfoExtractor):
|
|||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'data-collection=(["\'])(?P<data>{.+?})\1', webpage,
|
r'data-collection=(["\'])(?P<data>{.+?})\1', webpage,
|
||||||
'data collection', default='{}', group='data'),
|
'data collection', default='{}', group='data'),
|
||||||
display_id, transform_source=unescapeHTML, fatal=False)
|
display_id, transform_source=unescape_html, fatal=False)
|
||||||
video_id = self._extract_video_id(
|
video_id = self._extract_video_id(
|
||||||
data, lesson_id) or self._search_regex(
|
data, lesson_id) or self._search_regex(
|
||||||
r'/videos/(\d+)/', thumbnail, 'video id')
|
r'/videos/(\d+)/', thumbnail, 'video id')
|
||||||
|
@ -7,7 +7,7 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
js_to_json,
|
js_to_json,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -34,7 +34,7 @@ class ReutersIE(InfoExtractor):
|
|||||||
def get_json_value(key, fatal=False):
|
def get_json_value(key, fatal=False):
|
||||||
return self._search_regex(r'"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal)
|
return self._search_regex(r'"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal)
|
||||||
|
|
||||||
title = unescapeHTML(get_json_value('title', fatal=True))
|
title = unescape_html(get_json_value('title', fatal=True))
|
||||||
mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups()
|
mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups()
|
||||||
|
|
||||||
mas_data = self._download_json(
|
mas_data = self._download_json(
|
||||||
|
@ -8,7 +8,7 @@ from ..compat import compat_str
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
qualities,
|
qualities,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -59,8 +59,8 @@ class Revision3EmbedIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': playlist_id,
|
'id': playlist_id,
|
||||||
'title': unescapeHTML(video_data['title']),
|
'title': unescape_html(video_data['title']),
|
||||||
'description': unescapeHTML(video_data.get('summary')),
|
'description': unescape_html(video_data.get('summary')),
|
||||||
'uploader': video_data.get('show', {}).get('name'),
|
'uploader': video_data.get('show', {}).get('name'),
|
||||||
'uploader_id': video_data.get('show', {}).get('slug'),
|
'uploader_id': video_data.get('show', {}).get('slug'),
|
||||||
'duration': int_or_none(video_data.get('duration')),
|
'duration': int_or_none(video_data.get('duration')),
|
||||||
@ -120,8 +120,8 @@ class Revision3IE(InfoExtractor):
|
|||||||
info = {
|
info = {
|
||||||
'id': page_id,
|
'id': page_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': unescapeHTML(page_data['name']),
|
'title': unescape_html(page_data['name']),
|
||||||
'description': unescapeHTML(page_data.get('summary')),
|
'description': unescape_html(page_data.get('summary')),
|
||||||
'timestamp': parse_iso8601(page_data.get('publishTime'), ' '),
|
'timestamp': parse_iso8601(page_data.get('publishTime'), ' '),
|
||||||
'author': page_data.get('author'),
|
'author': page_data.get('author'),
|
||||||
'uploader': show_data.get('name'),
|
'uploader': show_data.get('name'),
|
||||||
|
@ -8,7 +8,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -92,7 +92,7 @@ class RoosterTeethIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
episode = strip_or_none(unescapeHTML(self._search_regex(
|
episode = strip_or_none(unescape_html(self._search_regex(
|
||||||
(r'videoTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
(r'videoTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||||
r'<title>(?P<title>[^<]+)</title>'), webpage, 'title',
|
r'<title>(?P<title>[^<]+)</title>'), webpage, 'title',
|
||||||
default=None, group='title')))
|
default=None, group='title')))
|
||||||
|
@ -10,7 +10,7 @@ from ..utils import (
|
|||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
@ -48,8 +48,8 @@ class RteBaseIE(InfoExtractor):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
if not info_dict:
|
if not info_dict:
|
||||||
title = unescapeHTML(show['title'])
|
title = unescape_html(show['title'])
|
||||||
description = unescapeHTML(show.get('description'))
|
description = unescape_html(show.get('description'))
|
||||||
thumbnail = show.get('thumbnail')
|
thumbnail = show.get('thumbnail')
|
||||||
duration = float_or_none(show.get('duration'), 1000)
|
duration = float_or_none(show.get('duration'), 1000)
|
||||||
timestamp = parse_iso8601(show.get('published'))
|
timestamp = parse_iso8601(show.get('published'))
|
||||||
|
@ -9,7 +9,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -226,5 +226,5 @@ class RTSIE(SRGSSRIE):
|
|||||||
'view_count': int_or_none(info.get('plays')),
|
'view_count': int_or_none(info.get('plays')),
|
||||||
'uploader': info.get('programName'),
|
'uploader': info.get('programName'),
|
||||||
'timestamp': parse_iso8601(info.get('broadcast_date')),
|
'timestamp': parse_iso8601(info.get('broadcast_date')),
|
||||||
'thumbnail': unescapeHTML(info.get('preview_image_url')),
|
'thumbnail': unescape_html(info.get('preview_image_url')),
|
||||||
}
|
}
|
||||||
|
@ -10,7 +10,7 @@ from ..utils import (
|
|||||||
update_url_query,
|
update_url_query,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
determine_protocol,
|
determine_protocol,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -95,7 +95,7 @@ class SendtoNewsIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
info_dict.update({
|
info_dict.update({
|
||||||
'title': video['S_headLine'].strip(),
|
'title': video['S_headLine'].strip(),
|
||||||
'description': unescapeHTML(video.get('S_fullStory')),
|
'description': unescape_html(video.get('S_fullStory')),
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'duration': float_or_none(video.get('SM_length')),
|
'duration': float_or_none(video.get('SM_length')),
|
||||||
'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '),
|
'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '),
|
||||||
|
@ -10,7 +10,7 @@ from ..utils import (
|
|||||||
xpath_attr,
|
xpath_attr,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
xpath_element,
|
xpath_element,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -70,7 +70,7 @@ class SpringboardPlatformIE(InfoExtractor):
|
|||||||
content = xpath_element(
|
content = xpath_element(
|
||||||
item, './{http://search.yahoo.com/mrss/}content', 'content',
|
item, './{http://search.yahoo.com/mrss/}content', 'content',
|
||||||
fatal=True)
|
fatal=True)
|
||||||
title = unescapeHTML(xpath_text(item, './title', 'title', fatal=True))
|
title = unescape_html(xpath_text(item, './title', 'title', fatal=True))
|
||||||
|
|
||||||
video_url = content.attrib['url']
|
video_url = content.attrib['url']
|
||||||
|
|
||||||
@ -84,7 +84,7 @@ class SpringboardPlatformIE(InfoExtractor):
|
|||||||
width = int_or_none(content.get('width'))
|
width = int_or_none(content.get('width'))
|
||||||
height = int_or_none(content.get('height'))
|
height = int_or_none(content.get('height'))
|
||||||
|
|
||||||
description = unescapeHTML(xpath_text(
|
description = unescape_html(xpath_text(
|
||||||
item, './description', 'description'))
|
item, './description', 'description'))
|
||||||
thumbnail = xpath_attr(
|
thumbnail = xpath_attr(
|
||||||
item, './{http://search.yahoo.com/mrss/}thumbnail', 'url',
|
item, './{http://search.yahoo.com/mrss/}thumbnail', 'url',
|
||||||
|
@ -5,8 +5,8 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -66,9 +66,9 @@ class StanfordOpenClassroomIE(InfoExtractor):
|
|||||||
r'(?s)<description>([^<]+)</description>',
|
r'(?s)<description>([^<]+)</description>',
|
||||||
coursepage, 'description', fatal=False)
|
coursepage, 'description', fatal=False)
|
||||||
|
|
||||||
links = orderedSet(re.findall(r'<a href="(VideoPage\.php\?[^"]+)">', coursepage))
|
links = ordered_set(re.findall(r'<a href="(VideoPage\.php\?[^"]+)">', coursepage))
|
||||||
info['entries'] = [self.url_result(
|
info['entries'] = [self.url_result(
|
||||||
'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
|
'http://openclassroom.stanford.edu/MainFolder/%s' % unescape_html(l)
|
||||||
) for l in links]
|
) for l in links]
|
||||||
return info
|
return info
|
||||||
else: # Root page
|
else: # Root page
|
||||||
@ -84,8 +84,8 @@ class StanfordOpenClassroomIE(InfoExtractor):
|
|||||||
rootpage = self._download_webpage(rootURL, info['id'],
|
rootpage = self._download_webpage(rootURL, info['id'],
|
||||||
errnote='Unable to download course info page')
|
errnote='Unable to download course info page')
|
||||||
|
|
||||||
links = orderedSet(re.findall(r'<a href="(CoursePage\.php\?[^"]+)">', rootpage))
|
links = ordered_set(re.findall(r'<a href="(CoursePage\.php\?[^"]+)">', rootpage))
|
||||||
info['entries'] = [self.url_result(
|
info['entries'] = [self.url_result(
|
||||||
'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
|
'http://openclassroom.stanford.edu/MainFolder/%s' % unescape_html(l)
|
||||||
) for l in links]
|
) for l in links]
|
||||||
return info
|
return info
|
||||||
|
@ -7,7 +7,7 @@ from ..utils import (
|
|||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -59,7 +59,7 @@ class StitcherIE(InfoExtractor):
|
|||||||
r'(?s)var\s+stitcher(?:Config)?\s*=\s*({.+?});\n', webpage, 'episode config')),
|
r'(?s)var\s+stitcher(?:Config)?\s*=\s*({.+?});\n', webpage, 'episode config')),
|
||||||
display_id)['config']['episode']
|
display_id)['config']['episode']
|
||||||
|
|
||||||
title = unescapeHTML(episode['title'])
|
title = unescape_html(episode['title'])
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': episode[episode_key],
|
'url': episode[episode_key],
|
||||||
'ext': determine_ext(episode[episode_key]) or 'mp3',
|
'ext': determine_ext(episode[episode_key]) or 'mp3',
|
||||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
determine_ext,
|
determine_ext,
|
||||||
dict_get,
|
dict_get,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
urljoin,
|
urljoin,
|
||||||
@ -363,7 +363,7 @@ class SVTPageIE(InfoExtractor):
|
|||||||
entries = [
|
entries = [
|
||||||
self.url_result(
|
self.url_result(
|
||||||
'svt:%s' % video_id, ie=SVTPlayIE.ie_key(), video_id=video_id)
|
'svt:%s' % video_id, ie=SVTPlayIE.ie_key(), video_id=video_id)
|
||||||
for video_id in orderedSet(re.findall(
|
for video_id in ordered_set(re.findall(
|
||||||
r'data-video-id=["\'](\d+)', webpage))]
|
r'data-video-id=["\'](\d+)', webpage))]
|
||||||
|
|
||||||
title = strip_or_none(self._og_search_title(webpage, default=None))
|
title = strip_or_none(self._og_search_title(webpage, default=None))
|
||||||
|
@ -10,7 +10,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -102,7 +102,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
def extract_video_url(vl):
|
def extract_video_url(vl):
|
||||||
# Any URL modification now results in HTTP Error 403: Forbidden
|
# Any URL modification now results in HTTP Error 403: Forbidden
|
||||||
return unescapeHTML(vl.text)
|
return unescape_html(vl.text)
|
||||||
|
|
||||||
video_link = cfg_xml.find('./videoLink')
|
video_link = cfg_xml.find('./videoLink')
|
||||||
if video_link is not None:
|
if video_link is not None:
|
||||||
|
@ -4,7 +4,7 @@ from __future__ import unicode_literals
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -46,7 +46,7 @@ class TVN24IE(InfoExtractor):
|
|||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'\b%s=(["\'])(?P<json>(?!\1).+?)\1' % attr, webpage,
|
r'\b%s=(["\'])(?P<json>(?!\1).+?)\1' % attr, webpage,
|
||||||
name, group='json', fatal=fatal) or '{}',
|
name, group='json', fatal=fatal) or '{}',
|
||||||
video_id, transform_source=unescapeHTML, fatal=fatal)
|
video_id, transform_source=unescape_html, fatal=fatal)
|
||||||
|
|
||||||
quality_data = extract_json('data-quality', 'formats')
|
quality_data = extract_json('data-quality', 'formats')
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -97,7 +97,7 @@ class TVNetIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
is_live = None
|
is_live = None
|
||||||
|
|
||||||
data_file = unescapeHTML(self._search_regex(
|
data_file = unescape_html(self._search_regex(
|
||||||
r'data-file=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage,
|
r'data-file=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage,
|
||||||
'data file', group='url'))
|
'data file', group='url'))
|
||||||
|
|
||||||
@ -125,7 +125,7 @@ class TVNetIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
|
|
||||||
thumbnail = self._og_search_thumbnail(
|
thumbnail = self._og_search_thumbnail(
|
||||||
webpage, default=None) or unescapeHTML(
|
webpage, default=None) or unescape_html(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'data-image=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage,
|
r'data-image=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage,
|
||||||
'thumbnail', default=None, group='url'))
|
'thumbnail', default=None, group='url'))
|
||||||
|
@ -19,7 +19,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
qualities,
|
qualities,
|
||||||
@ -389,7 +389,7 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
|
|||||||
break
|
break
|
||||||
offset += limit
|
offset += limit
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
[self._make_url_result(entry) for entry in orderedSet(entries)],
|
[self._make_url_result(entry) for entry in ordered_set(entries)],
|
||||||
channel_id, channel_name)
|
channel_id, channel_name)
|
||||||
|
|
||||||
def _make_url_result(self, url):
|
def _make_url_result(self, url):
|
||||||
|
@ -19,7 +19,7 @@ from ..utils import (
|
|||||||
js_to_json,
|
js_to_json,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
@ -68,7 +68,7 @@ class UdemyIE(InfoExtractor):
|
|||||||
|
|
||||||
def _extract_course_info(self, webpage, video_id):
|
def _extract_course_info(self, webpage, video_id):
|
||||||
course = self._parse_json(
|
course = self._parse_json(
|
||||||
unescapeHTML(self._search_regex(
|
unescape_html(self._search_regex(
|
||||||
r'ng-init=["\'].*\bcourse=({.+?})[;"\']',
|
r'ng-init=["\'].*\bcourse=({.+?})[;"\']',
|
||||||
webpage, 'course', default='{}')),
|
webpage, 'course', default='{}')),
|
||||||
video_id, fatal=False) or {}
|
video_id, fatal=False) or {}
|
||||||
@ -80,7 +80,7 @@ class UdemyIE(InfoExtractor):
|
|||||||
def combine_url(base_url, url):
|
def combine_url(base_url, url):
|
||||||
return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url
|
return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url
|
||||||
|
|
||||||
checkout_url = unescapeHTML(self._search_regex(
|
checkout_url = unescape_html(self._search_regex(
|
||||||
r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/(?:payment|cart)/checkout/.+?)\1',
|
r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/(?:payment|cart)/checkout/.+?)\1',
|
||||||
webpage, 'checkout url', group='url', default=None))
|
webpage, 'checkout url', group='url', default=None))
|
||||||
if checkout_url:
|
if checkout_url:
|
||||||
@ -90,7 +90,7 @@ class UdemyIE(InfoExtractor):
|
|||||||
% (course_id, combine_url(base_url, checkout_url)),
|
% (course_id, combine_url(base_url, checkout_url)),
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
enroll_url = unescapeHTML(self._search_regex(
|
enroll_url = unescape_html(self._search_regex(
|
||||||
r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/course/subscribe/.+?)\1',
|
r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/course/subscribe/.+?)\1',
|
||||||
webpage, 'enroll url', group='url', default=None))
|
webpage, 'enroll url', group='url', default=None))
|
||||||
if enroll_url:
|
if enroll_url:
|
||||||
@ -365,7 +365,7 @@ class UdemyIE(InfoExtractor):
|
|||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'videojs-setup-data=(["\'])(?P<data>{.+?})\1', view_html,
|
r'videojs-setup-data=(["\'])(?P<data>{.+?})\1', view_html,
|
||||||
'setup data', default='{}', group='data'), video_id,
|
'setup data', default='{}', group='data'), video_id,
|
||||||
transform_source=unescapeHTML, fatal=False)
|
transform_source=unescape_html, fatal=False)
|
||||||
if data and isinstance(data, dict):
|
if data and isinstance(data, dict):
|
||||||
extract_formats(data.get('sources'))
|
extract_formats(data.get('sources'))
|
||||||
if not duration:
|
if not duration:
|
||||||
@ -377,7 +377,7 @@ class UdemyIE(InfoExtractor):
|
|||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'text-tracks=(["\'])(?P<data>\[.+?\])\1', view_html,
|
r'text-tracks=(["\'])(?P<data>\[.+?\])\1', view_html,
|
||||||
'text tracks', default='{}', group='data'), video_id,
|
'text tracks', default='{}', group='data'), video_id,
|
||||||
transform_source=lambda s: js_to_json(unescapeHTML(s)),
|
transform_source=lambda s: js_to_json(unescape_html(s)),
|
||||||
fatal=False)
|
fatal=False)
|
||||||
extract_subtitles(text_tracks)
|
extract_subtitles(text_tracks)
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -37,7 +37,7 @@ class UstudioIE(InfoExtractor):
|
|||||||
|
|
||||||
def extract(kind):
|
def extract(kind):
|
||||||
return [{
|
return [{
|
||||||
'url': unescapeHTML(item.attrib['url']),
|
'url': unescape_html(item.attrib['url']),
|
||||||
'width': int_or_none(item.get('width')),
|
'width': int_or_none(item.get('width')),
|
||||||
'height': int_or_none(item.get('height')),
|
'height': int_or_none(item.get('height')),
|
||||||
} for item in config.findall('./qualities/quality/%s' % kind) if item.get('url')]
|
} for item in config.findall('./qualities/quality/%s' % kind) if item.get('url')]
|
||||||
|
@ -30,7 +30,7 @@ from ..utils import (
|
|||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -450,7 +450,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
for mobj in re.finditer(
|
for mobj in re.finditer(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1',
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1',
|
||||||
webpage):
|
webpage):
|
||||||
urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url))
|
urls.append(VimeoIE._smuggle_referrer(unescape_html(mobj.group('url')), url))
|
||||||
PLAIN_EMBED_RE = (
|
PLAIN_EMBED_RE = (
|
||||||
# Look for embedded (swf embed) Vimeo player
|
# Look for embedded (swf embed) Vimeo player
|
||||||
r'<embed[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1',
|
r'<embed[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1',
|
||||||
|
@ -15,11 +15,11 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
remove_start,
|
remove_start,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
@ -422,7 +422,7 @@ class VKIE(VKBaseIE):
|
|||||||
'player params'),
|
'player params'),
|
||||||
video_id)['params'][0]
|
video_id)['params'][0]
|
||||||
|
|
||||||
title = unescapeHTML(data['md_title'])
|
title = unescape_html(data['md_title'])
|
||||||
|
|
||||||
# 2 = live
|
# 2 = live
|
||||||
# 3 = post live (finished live)
|
# 3 = post live (finished live)
|
||||||
@ -514,9 +514,9 @@ class VKUserVideosIE(VKBaseIE):
|
|||||||
entries = [
|
entries = [
|
||||||
self.url_result(
|
self.url_result(
|
||||||
'http://vk.com/video' + video_id, 'VK', video_id=video_id)
|
'http://vk.com/video' + video_id, 'VK', video_id=video_id)
|
||||||
for video_id in orderedSet(re.findall(r'href="/video(-?[0-9_]+)"', webpage))]
|
for video_id in ordered_set(re.findall(r'href="/video(-?[0-9_]+)"', webpage))]
|
||||||
|
|
||||||
title = unescapeHTML(self._search_regex(
|
title = unescape_html(self._search_regex(
|
||||||
r'<title>\s*([^<]+?)\s+\|\s+\d+\s+videos',
|
r'<title>\s*([^<]+?)\s+\|\s+\d+\s+videos',
|
||||||
webpage, 'title', default=page_id))
|
webpage, 'title', default=page_id))
|
||||||
|
|
||||||
@ -623,7 +623,7 @@ class VKWallPostIE(VKBaseIE):
|
|||||||
audios = self._parse_json(
|
audios = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'<!json>(.+?)<!>', al_audio, 'audios', default='[]'),
|
r'<!json>(.+?)<!>', al_audio, 'audios', default='[]'),
|
||||||
post_id, fatal=False, transform_source=unescapeHTML)
|
post_id, fatal=False, transform_source=unescape_html)
|
||||||
if isinstance(audios, list):
|
if isinstance(audios, list):
|
||||||
for audio in audios:
|
for audio in audios:
|
||||||
a = Audio._make(audio[:6])
|
a = Audio._make(audio[:6])
|
||||||
@ -646,6 +646,6 @@ class VKWallPostIE(VKBaseIE):
|
|||||||
title = 'Wall post %s' % post_id
|
title = 'Wall post %s' % post_id
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
orderedSet(entries), post_id,
|
ordered_set(entries), post_id,
|
||||||
'%s - %s' % (uploader, title) if uploader else title,
|
'%s - %s' % (uploader, title) if uploader else title,
|
||||||
description)
|
description)
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import unescapeHTML
|
from ..utils import unescape_html
|
||||||
|
|
||||||
|
|
||||||
class VODPlatformIE(InfoExtractor):
|
class VODPlatformIE(InfoExtractor):
|
||||||
@ -22,7 +22,7 @@ class VODPlatformIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = unescapeHTML(self._og_search_title(webpage))
|
title = unescape_html(self._og_search_title(webpage))
|
||||||
hidden_inputs = self._hidden_inputs(webpage)
|
hidden_inputs = self._hidden_inputs(webpage)
|
||||||
|
|
||||||
formats = self._extract_wowza_formats(
|
formats = self._extract_wowza_formats(
|
||||||
|
@ -9,7 +9,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -49,7 +49,7 @@ class VrakIE(InfoExtractor):
|
|||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'data-player-options-content=(["\'])(?P<content>{.+?})\1',
|
r'data-player-options-content=(["\'])(?P<content>{.+?})\1',
|
||||||
webpage, 'content', default='{}', group='content'),
|
webpage, 'content', default='{}', group='content'),
|
||||||
video_id, transform_source=unescapeHTML)
|
video_id, transform_source=unescape_html)
|
||||||
|
|
||||||
ref_id = content.get('refId') or self._search_regex(
|
ref_id = content.get('refId') or self._search_regex(
|
||||||
r'refId":"([^&]+)"', webpage, 'ref id')
|
r'refId":"([^&]+)"', webpage, 'ref id')
|
||||||
|
@ -10,7 +10,7 @@ from ..utils import (
|
|||||||
js_to_json,
|
js_to_json,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -72,7 +72,7 @@ class WatchBoxIE(InfoExtractor):
|
|||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'data-player-conf=(["\'])(?P<data>{.+?})\1', webpage,
|
r'data-player-conf=(["\'])(?P<data>{.+?})\1', webpage,
|
||||||
'player config', default='{}', group='data'),
|
'player config', default='{}', group='data'),
|
||||||
video_id, transform_source=unescapeHTML, fatal=False)
|
video_id, transform_source=unescape_html, fatal=False)
|
||||||
|
|
||||||
if not player_config:
|
if not player_config:
|
||||||
player_config = self._parse_json(
|
player_config = self._parse_json(
|
||||||
|
@ -6,7 +6,7 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -139,7 +139,7 @@ class WebOfStoriesPlaylistIE(InfoExtractor):
|
|||||||
self.url_result(
|
self.url_result(
|
||||||
'http://www.webofstories.com/play/%s' % video_id,
|
'http://www.webofstories.com/play/%s' % video_id,
|
||||||
'WebOfStories', video_id=video_id)
|
'WebOfStories', video_id=video_id)
|
||||||
for video_id in orderedSet(re.findall(r'\bid=["\']td_(\d+)', webpage))
|
for video_id in ordered_set(re.findall(r'\bid=["\']td_(\d+)', webpage))
|
||||||
]
|
]
|
||||||
|
|
||||||
title = self._search_regex(
|
title = self._search_regex(
|
||||||
|
@ -7,7 +7,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -48,7 +48,7 @@ class WistiaIE(InfoExtractor):
|
|||||||
match = re.search(
|
match = re.search(
|
||||||
r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/iframe/.+?)\1', webpage)
|
r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/iframe/.+?)\1', webpage)
|
||||||
if match:
|
if match:
|
||||||
return unescapeHTML(match.group('url'))
|
return unescape_html(match.group('url'))
|
||||||
|
|
||||||
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
|
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
|
||||||
if match:
|
if match:
|
||||||
|
@ -6,7 +6,7 @@ from .common import InfoExtractor
|
|||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
@ -123,7 +123,7 @@ class WWEPlaylistIE(WWEBaseIE):
|
|||||||
for mobj in re.finditer(
|
for mobj in re.finditer(
|
||||||
r'data-video\s*=\s*(["\'])(?P<data>{.+?})\1', webpage):
|
r'data-video\s*=\s*(["\'])(?P<data>{.+?})\1', webpage):
|
||||||
video = self._parse_json(
|
video = self._parse_json(
|
||||||
mobj.group('data'), display_id, transform_source=unescapeHTML,
|
mobj.group('data'), display_id, transform_source=unescape_html,
|
||||||
fatal=False)
|
fatal=False)
|
||||||
if not video:
|
if not video:
|
||||||
continue
|
continue
|
||||||
|
@ -7,7 +7,7 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
@ -167,7 +167,7 @@ class XTubeUserIE(InfoExtractor):
|
|||||||
if not html:
|
if not html:
|
||||||
break
|
break
|
||||||
|
|
||||||
for video_id in orderedSet([video_id for _, video_id in re.findall(
|
for video_id in ordered_set([video_id for _, video_id in re.findall(
|
||||||
r'data-plid=(["\'])(.+?)\1', html)]):
|
r'data-plid=(["\'])(.+?)\1', html)]):
|
||||||
entries.append(self.url_result('xtube:%s' % video_id, XTubeIE.ie_key()))
|
entries.append(self.url_result('xtube:%s' % video_id, XTubeIE.ie_key()))
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
from .brightcove import (
|
from .brightcove import (
|
||||||
@ -415,7 +415,7 @@ class YahooIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': unescapeHTML(meta['title']),
|
'title': unescape_html(meta['title']),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': clean_html(meta['description']),
|
'description': clean_html(meta['description']),
|
||||||
'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage),
|
'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage),
|
||||||
|
@ -8,7 +8,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
qualities,
|
qualities,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -35,7 +35,7 @@ class YapFilesIE(InfoExtractor):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
|
return [unescape_html(mobj.group('url')) for mobj in re.finditer(
|
||||||
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.*?)\1'
|
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.*?)\1'
|
||||||
% YapFilesIE._YAPFILES_URL, webpage)]
|
% YapFilesIE._YAPFILES_URL, webpage)]
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
@ -112,7 +112,7 @@ class YouPornIE(InfoExtractor):
|
|||||||
links.append(aes_decrypt_text(encrypted_link, title, 32).decode('utf-8'))
|
links.append(aes_decrypt_text(encrypted_link, title, 32).decode('utf-8'))
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for video_url in set(unescapeHTML(link) for link in links):
|
for video_url in set(unescape_html(link) for link in links):
|
||||||
f = {
|
f = {
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
}
|
}
|
||||||
|
@ -34,7 +34,7 @@ from ..utils import (
|
|||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
parse_codecs,
|
parse_codecs,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
qualities,
|
qualities,
|
||||||
@ -44,7 +44,7 @@ from ..utils import (
|
|||||||
str_or_none,
|
str_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
uppercase_escape,
|
uppercase_escape,
|
||||||
@ -312,7 +312,7 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
|
|||||||
if 'index' in mobj.groupdict() and mobj.group('id') == '0':
|
if 'index' in mobj.groupdict() and mobj.group('id') == '0':
|
||||||
continue
|
continue
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
video_title = unescapeHTML(mobj.group('title'))
|
video_title = unescape_html(mobj.group('title'))
|
||||||
if video_title:
|
if video_title:
|
||||||
video_title = video_title.strip()
|
video_title = video_title.strip()
|
||||||
try:
|
try:
|
||||||
@ -327,7 +327,7 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
|
|||||||
|
|
||||||
class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
|
class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
|
||||||
def _process_page(self, content):
|
def _process_page(self, content):
|
||||||
for playlist_id in orderedSet(re.findall(
|
for playlist_id in ordered_set(re.findall(
|
||||||
r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
|
r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
|
||||||
content)):
|
content)):
|
||||||
yield self.url_result(
|
yield self.url_result(
|
||||||
@ -1423,7 +1423,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
# Embedded YouTube player
|
# Embedded YouTube player
|
||||||
entries = [
|
entries = [
|
||||||
unescapeHTML(mobj.group('url'))
|
unescape_html(mobj.group('url'))
|
||||||
for mobj in re.finditer(r'''(?x)
|
for mobj in re.finditer(r'''(?x)
|
||||||
(?:
|
(?:
|
||||||
<iframe[^>]+?src=|
|
<iframe[^>]+?src=|
|
||||||
@ -1440,7 +1440,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
# lazyYT YouTube embed
|
# lazyYT YouTube embed
|
||||||
entries.extend(list(map(
|
entries.extend(list(map(
|
||||||
unescapeHTML,
|
unescape_html,
|
||||||
re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
|
re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
|
||||||
|
|
||||||
# Wordpress "YouTube Video Importer" plugin
|
# Wordpress "YouTube Video Importer" plugin
|
||||||
@ -1730,7 +1730,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
else:
|
else:
|
||||||
fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
|
fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
|
||||||
if fd_mobj:
|
if fd_mobj:
|
||||||
video_description = unescapeHTML(fd_mobj.group(1))
|
video_description = unescape_html(fd_mobj.group(1))
|
||||||
else:
|
else:
|
||||||
video_description = ''
|
video_description = ''
|
||||||
|
|
||||||
@ -2047,7 +2047,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
''',
|
''',
|
||||||
video_webpage)
|
video_webpage)
|
||||||
if m_music:
|
if m_music:
|
||||||
video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
|
video_alt_title = remove_quotes(unescape_html(m_music.group('title')))
|
||||||
video_creator = clean_html(m_music.group('creator'))
|
video_creator = clean_html(m_music.group('creator'))
|
||||||
else:
|
else:
|
||||||
video_alt_title = video_creator = None
|
video_alt_title = video_creator = None
|
||||||
@ -2064,7 +2064,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
|
r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
|
||||||
video_webpage)
|
video_webpage)
|
||||||
if m_episode:
|
if m_episode:
|
||||||
series = unescapeHTML(m_episode.group('series'))
|
series = unescape_html(m_episode.group('series'))
|
||||||
season_number = int(m_episode.group('season'))
|
season_number = int(m_episode.group('season'))
|
||||||
episode_number = int(m_episode.group('episode'))
|
episode_number = int(m_episode.group('episode'))
|
||||||
else:
|
else:
|
||||||
@ -2082,7 +2082,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
video_categories = None
|
video_categories = None
|
||||||
|
|
||||||
video_tags = [
|
video_tags = [
|
||||||
unescapeHTML(m.group('content'))
|
unescape_html(m.group('content'))
|
||||||
for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
|
for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
|
||||||
|
|
||||||
def _extract_count(count_name):
|
def _extract_count(count_name):
|
||||||
@ -2394,7 +2394,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
|||||||
url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
|
url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
|
url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
|
||||||
new_ids = orderedSet(re.findall(
|
new_ids = ordered_set(re.findall(
|
||||||
r'''(?xs)data-video-username=".*?".*?
|
r'''(?xs)data-video-username=".*?".*?
|
||||||
href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
|
href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
|
||||||
webpage))
|
webpage))
|
||||||
@ -2875,7 +2875,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
# 'recommended' feed has infinite 'load more' and each new portion spins
|
# 'recommended' feed has infinite 'load more' and each new portion spins
|
||||||
# the same videos in (sometimes) slightly different order, so we'll check
|
# the same videos in (sometimes) slightly different order, so we'll check
|
||||||
# for unicity and break when portion has no new videos
|
# for unicity and break when portion has no new videos
|
||||||
new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
|
new_ids = list(filter(lambda video_id: video_id not in ids, ordered_set(matches)))
|
||||||
if not new_ids:
|
if not new_ids:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ from ..utils import (
|
|||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
NO_DEFAULT,
|
NO_DEFAULT,
|
||||||
orderedSet,
|
ordered_set,
|
||||||
parse_codecs,
|
parse_codecs,
|
||||||
qualities,
|
qualities,
|
||||||
try_get,
|
try_get,
|
||||||
@ -267,7 +267,7 @@ class ZDFChannelIE(ZDFBaseIE):
|
|||||||
|
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result(item_url, ie=ZDFIE.ie_key())
|
self.url_result(item_url, ie=ZDFIE.ie_key())
|
||||||
for item_url in orderedSet(re.findall(
|
for item_url in ordered_set(re.findall(
|
||||||
r'data-plusbar-url=["\'](http.+?\.html)', webpage))]
|
r'data-plusbar-url=["\'](http.+?\.html)', webpage))]
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user