pep: function name refactorings

This commit is contained in:
Riaan F Venter 2019-01-24 22:05:54 +08:00
parent fad4ceb534
commit 1c33e3bee2
105 changed files with 391 additions and 391 deletions

View File

@ -21,7 +21,7 @@ from youtube_dl.compat import (
compat_urllib_request, compat_urllib_request,
) )
from youtube_dl.utils import ( from youtube_dl.utils import (
make_HTTPS_handler, make_https_handler,
sanitized_Request, sanitized_Request,
) )
@ -33,7 +33,7 @@ class GitHubReleaser(object):
def __init__(self, debuglevel=0): def __init__(self, debuglevel=0):
self._init_github_account() self._init_github_account()
https_handler = make_HTTPS_handler({}, debuglevel=debuglevel) https_handler = make_https_handler({}, debuglevel=debuglevel)
self._opener = compat_urllib_request.build_opener(https_handler) self._opener = compat_urllib_request.build_opener(https_handler)
def _init_github_account(self): def _init_github_account(self):

View File

@ -13,7 +13,7 @@ from test.helper import http_server_port, try_rm
from youtube_dl import YoutubeDL from youtube_dl import YoutubeDL
from youtube_dl.compat import compat_http_server from youtube_dl.compat import compat_http_server
from youtube_dl.downloader.http import HttpFD from youtube_dl.downloader.http import HttpFD
from youtube_dl.utils import encodeFilename from youtube_dl.utils import encode_filename
import threading import threading
TEST_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DIR = os.path.dirname(os.path.abspath(__file__))
@ -91,12 +91,12 @@ class TestHttpFD(unittest.TestCase):
ydl = YoutubeDL(params) ydl = YoutubeDL(params)
downloader = HttpFD(ydl, params) downloader = HttpFD(ydl, params)
filename = 'testfile.mp4' filename = 'testfile.mp4'
try_rm(encodeFilename(filename)) try_rm(encode_filename(filename))
self.assertTrue(downloader.real_download(filename, { self.assertTrue(downloader.real_download(filename, {
'url': 'http://127.0.0.1:%d/%s' % (self.port, ep), 'url': 'http://127.0.0.1:%d/%s' % (self.port, ep),
})) }))
self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE) self.assertEqual(os.path.getsize(encode_filename(filename)), TEST_SIZE)
try_rm(encodeFilename(filename)) try_rm(encode_filename(filename))
def download_all(self, params): def download_all(self, params):
for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'): for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'):

View File

@ -10,7 +10,7 @@ import os
import subprocess import subprocess
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.utils import encodeArgument from youtube_dl.utils import encode_argument
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@ -34,7 +34,7 @@ class TestExecution(unittest.TestCase):
def test_cmdline_umlauts(self): def test_cmdline_umlauts(self):
p = subprocess.Popen( p = subprocess.Popen(
[sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'], [sys.executable, 'youtube_dl/__main__.py', encode_argument('ä'), '--version'],
cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE) cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
_, stderr = p.communicate() _, stderr = p.communicate()
self.assertFalse(stderr) self.assertFalse(stderr)

View File

@ -26,7 +26,7 @@ from youtube_dl.utils import (
determine_ext, determine_ext,
dict_get, dict_get,
encode_compat_str, encode_compat_str,
encodeFilename, encode_filename,
escape_rfc3986, escape_rfc3986,
escape_url, escape_url,
extract_attributes, extract_attributes,
@ -48,7 +48,7 @@ from youtube_dl.utils import (
multipart_encode, multipart_encode,
ohdave_rsa_encrypt, ohdave_rsa_encrypt,
OnDemandPagedList, OnDemandPagedList,
orderedSet, ordered_set,
parse_age_limit, parse_age_limit,
parse_duration, parse_duration,
parse_filesize, parse_filesize,
@ -71,7 +71,7 @@ from youtube_dl.utils import (
str_to_int, str_to_int,
strip_jsonp, strip_jsonp,
timeconvert, timeconvert,
unescapeHTML, unescape_html,
unified_strdate, unified_strdate,
unified_timestamp, unified_timestamp,
unsmuggle_url, unsmuggle_url,
@ -277,21 +277,21 @@ class TestUtil(unittest.TestCase):
self.assertEqual(remove_quotes('";"'), ';') self.assertEqual(remove_quotes('";"'), ';')
def test_ordered_set(self): def test_ordered_set(self):
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) self.assertEqual(ordered_set([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
self.assertEqual(orderedSet([]), []) self.assertEqual(ordered_set([]), [])
self.assertEqual(orderedSet([1]), [1]) self.assertEqual(ordered_set([1]), [1])
# keep the list ordered # keep the list ordered
self.assertEqual(orderedSet([135, 1, 1, 1]), [135, 1]) self.assertEqual(ordered_set([135, 1, 1, 1]), [135, 1])
def test_unescape_html(self): def test_unescape_html(self):
self.assertEqual(unescapeHTML('%20;'), '%20;') self.assertEqual(unescape_html('%20;'), '%20;')
self.assertEqual(unescapeHTML('/'), '/') self.assertEqual(unescape_html('/'), '/')
self.assertEqual(unescapeHTML('/'), '/') self.assertEqual(unescape_html('/'), '/')
self.assertEqual(unescapeHTML('é'), 'é') self.assertEqual(unescape_html('é'), 'é')
self.assertEqual(unescapeHTML('�'), '�') self.assertEqual(unescape_html('�'), '�')
self.assertEqual(unescapeHTML('&a"'), '&a"') self.assertEqual(unescape_html('&a"'), '&a"')
# HTML5 entities # HTML5 entities
self.assertEqual(unescapeHTML('.''), '.\'') self.assertEqual(unescape_html('.''), '.\'')
def test_date_from_str(self): def test_date_from_str(self):
self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day')) self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day'))
@ -462,7 +462,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(res_data, {'a': 'b', 'c': 'd'}) self.assertEqual(res_data, {'a': 'b', 'c': 'd'})
def test_shell_quote(self): def test_shell_quote(self):
args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')] args = ['ffmpeg', '-i', encode_filename('ñ€ß\'.mp4')]
self.assertEqual( self.assertEqual(
shell_quote(args), shell_quote(args),
"""ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''') """ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''')

View File

@ -53,19 +53,19 @@ from .utils import (
determine_protocol, determine_protocol,
DownloadError, DownloadError,
encode_compat_str, encode_compat_str,
encodeFilename, encode_filename,
error_to_compat_str, error_to_compat_str,
expand_path, expand_path,
ExtractorError, ExtractorError,
format_bytes, format_bytes,
formatSeconds, format_seconds,
GeoRestrictedError, GeoRestrictedError,
int_or_none, int_or_none,
ISO3166Utils, ISO3166Utils,
locked_file, locked_file,
make_HTTPS_handler, make_https_handler,
MaxDownloadsReached, MaxDownloadsReached,
orderedSet, ordered_set,
PagedList, PagedList,
parse_filesize, parse_filesize,
PerRequestProxyHandler, PerRequestProxyHandler,
@ -710,7 +710,7 @@ class YoutubeDL(object):
# 'Treat' all problem characters by passing filename through preferredencoding # 'Treat' all problem characters by passing filename through preferredencoding
# to workaround encoding issues with subprocess on python2 @ Windows # to workaround encoding issues with subprocess on python2 @ Windows
if sys.version_info < (3, 0) and sys.platform == 'win32': if sys.version_info < (3, 0) and sys.platform == 'win32':
filename = encodeFilename(filename, True).decode(preferredencoding()) filename = encode_filename(filename, True).decode(preferredencoding())
return sanitize_path(filename) return sanitize_path(filename)
except ValueError as err: except ValueError as err:
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
@ -918,7 +918,7 @@ class YoutubeDL(object):
yield int(item) yield int(item)
else: else:
yield int(string_segment) yield int(string_segment)
playlistitems = orderedSet(iter_playlistitems(playlistitems_str)) playlistitems = ordered_set(iter_playlistitems(playlistitems_str))
ie_entries = ie_result['entries'] ie_entries = ie_result['entries']
@ -1735,7 +1735,7 @@ class YoutubeDL(object):
if self.params.get('forcefilename', False) and filename is not None: if self.params.get('forcefilename', False) and filename is not None:
self.to_stdout(filename) self.to_stdout(filename)
if self.params.get('forceduration', False) and info_dict.get('duration') is not None: if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
self.to_stdout(formatSeconds(info_dict['duration'])) self.to_stdout(format_seconds(info_dict['duration']))
if self.params.get('forceformat', False): if self.params.get('forceformat', False):
self.to_stdout(info_dict['format']) self.to_stdout(info_dict['format'])
if self.params.get('forcejson', False): if self.params.get('forcejson', False):
@ -1758,19 +1758,19 @@ class YoutubeDL(object):
self.report_error('unable to create directory ' + error_to_compat_str(err)) self.report_error('unable to create directory ' + error_to_compat_str(err))
return False return False
if not ensure_dir_exists(sanitize_path(encodeFilename(filename))): if not ensure_dir_exists(sanitize_path(encode_filename(filename))):
return return
if self.params.get('writedescription', False): if self.params.get('writedescription', False):
descfn = replace_extension(filename, 'description', info_dict.get('ext')) descfn = replace_extension(filename, 'description', info_dict.get('ext'))
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)): if self.params.get('nooverwrites', False) and os.path.exists(encode_filename(descfn)):
self.to_screen('[info] Video description is already present') self.to_screen('[info] Video description is already present')
elif info_dict.get('description') is None: elif info_dict.get('description') is None:
self.report_warning('There\'s no description to write.') self.report_warning('There\'s no description to write.')
else: else:
try: try:
self.to_screen('[info] Writing video description to: ' + descfn) self.to_screen('[info] Writing video description to: ' + descfn)
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: with io.open(encode_filename(descfn), 'w', encoding='utf-8') as descfile:
descfile.write(info_dict['description']) descfile.write(info_dict['description'])
except (OSError, IOError): except (OSError, IOError):
self.report_error('Cannot write description file ' + descfn) self.report_error('Cannot write description file ' + descfn)
@ -1778,12 +1778,12 @@ class YoutubeDL(object):
if self.params.get('writeannotations', False): if self.params.get('writeannotations', False):
annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext')) annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)): if self.params.get('nooverwrites', False) and os.path.exists(encode_filename(annofn)):
self.to_screen('[info] Video annotations are already present') self.to_screen('[info] Video annotations are already present')
else: else:
try: try:
self.to_screen('[info] Writing video annotations to: ' + annofn) self.to_screen('[info] Writing video annotations to: ' + annofn)
with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: with io.open(encode_filename(annofn), 'w', encoding='utf-8') as annofile:
annofile.write(info_dict['annotations']) annofile.write(info_dict['annotations'])
except (KeyError, TypeError): except (KeyError, TypeError):
self.report_warning('There are no annotations to write.') self.report_warning('There are no annotations to write.')
@ -1802,7 +1802,7 @@ class YoutubeDL(object):
for sub_lang, sub_info in subtitles.items(): for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext'] sub_format = sub_info['ext']
sub_filename = subtitles_filename(filename, sub_lang, sub_format) sub_filename = subtitles_filename(filename, sub_lang, sub_format)
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)): if self.params.get('nooverwrites', False) and os.path.exists(encode_filename(sub_filename)):
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format)) self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
else: else:
self.to_screen('[info] Writing video subtitles to: ' + sub_filename) self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
@ -1810,7 +1810,7 @@ class YoutubeDL(object):
try: try:
# Use newline='' to prevent conversion of newline characters # Use newline='' to prevent conversion of newline characters
# See https://github.com/rg3/youtube-dl/issues/10268 # See https://github.com/rg3/youtube-dl/issues/10268
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile: with io.open(encode_filename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
subfile.write(sub_info['data']) subfile.write(sub_info['data'])
except (OSError, IOError): except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename) self.report_error('Cannot write subtitles file ' + sub_filename)
@ -1819,7 +1819,7 @@ class YoutubeDL(object):
try: try:
sub_data = ie._request_webpage( sub_data = ie._request_webpage(
sub_info['url'], info_dict['id'], note=False).read() sub_info['url'], info_dict['id'], note=False).read()
with io.open(encodeFilename(sub_filename), 'wb') as subfile: with io.open(encode_filename(sub_filename), 'wb') as subfile:
subfile.write(sub_data) subfile.write(sub_data)
except (ExtractorError, IOError, OSError, ValueError) as err: except (ExtractorError, IOError, OSError, ValueError) as err:
self.report_warning('Unable to download subtitle for "%s": %s' % self.report_warning('Unable to download subtitle for "%s": %s' %
@ -1828,7 +1828,7 @@ class YoutubeDL(object):
if self.params.get('writeinfojson', False): if self.params.get('writeinfojson', False):
infofn = replace_extension(filename, 'info.json', info_dict.get('ext')) infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)): if self.params.get('nooverwrites', False) and os.path.exists(encode_filename(infofn)):
self.to_screen('[info] Video description metadata is already present') self.to_screen('[info] Video description metadata is already present')
else: else:
self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn) self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
@ -1889,7 +1889,7 @@ class YoutubeDL(object):
'Requested formats are incompatible for merge and will be merged into mkv.') 'Requested formats are incompatible for merge and will be merged into mkv.')
# Ensure filename always has a correct extension for successful merge # Ensure filename always has a correct extension for successful merge
filename = '%s.%s' % (filename_wo_ext, info_dict['ext']) filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
if os.path.exists(encodeFilename(filename)): if os.path.exists(encode_filename(filename)):
self.to_screen( self.to_screen(
'[download] %s has already been downloaded and ' '[download] %s has already been downloaded and '
'merged' % filename) 'merged' % filename)
@ -2055,7 +2055,7 @@ class YoutubeDL(object):
for old_filename in files_to_delete: for old_filename in files_to_delete:
self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename) self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
try: try:
os.remove(encodeFilename(old_filename)) os.remove(encode_filename(old_filename))
except (IOError, OSError): except (IOError, OSError):
self.report_warning('Unable to remove downloaded original file') self.report_warning('Unable to remove downloaded original file')
@ -2319,7 +2319,7 @@ class YoutubeDL(object):
proxy_handler = PerRequestProxyHandler(proxies) proxy_handler = PerRequestProxyHandler(proxies)
debuglevel = 1 if self.params.get('debug_printtraffic') else 0 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) https_handler = make_https_handler(self.params, debuglevel=debuglevel)
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
data_handler = compat_urllib_request_DataHandler() data_handler = compat_urllib_request_DataHandler()
@ -2378,7 +2378,7 @@ class YoutubeDL(object):
thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else '' thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)): if self.params.get('nooverwrites', False) and os.path.exists(encode_filename(thumb_filename)):
self.to_screen('[%s] %s: Thumbnail %sis already present' % self.to_screen('[%s] %s: Thumbnail %sis already present' %
(info_dict['extractor'], info_dict['id'], thumb_display_id)) (info_dict['extractor'], info_dict['id'], thumb_display_id))
else: else:
@ -2386,7 +2386,7 @@ class YoutubeDL(object):
(info_dict['extractor'], info_dict['id'], thumb_display_id)) (info_dict['extractor'], info_dict['id'], thumb_display_id))
try: try:
uf = self.urlopen(t['url']) uf = self.urlopen(t['url'])
with open(encodeFilename(thumb_filename), 'wb') as thumbf: with open(encode_filename(thumb_filename), 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf) shutil.copyfileobj(uf, thumbf)
self.to_screen('[%s] %s: Writing thumbnail %sto: %s' % self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename)) (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))

View File

@ -22,7 +22,7 @@ from .compat import (
) )
from .utils import ( from .utils import (
DateRange, DateRange,
decodeOption, decode_option,
DEFAULT_OUTTMPL, DEFAULT_OUTTMPL,
DownloadError, DownloadError,
expand_path, expand_path,
@ -375,8 +375,8 @@ def _real_main(argv=None):
'listsubtitles': opts.listsubtitles, 'listsubtitles': opts.listsubtitles,
'subtitlesformat': opts.subtitlesformat, 'subtitlesformat': opts.subtitlesformat,
'subtitleslangs': opts.subtitleslangs, 'subtitleslangs': opts.subtitleslangs,
'matchtitle': decodeOption(opts.matchtitle), 'matchtitle': decode_option(opts.matchtitle),
'rejecttitle': decodeOption(opts.rejecttitle), 'rejecttitle': decode_option(opts.rejecttitle),
'max_downloads': opts.max_downloads, 'max_downloads': opts.max_downloads,
'prefer_free_formats': opts.prefer_free_formats, 'prefer_free_formats': opts.prefer_free_formats,
'verbose': opts.verbose, 'verbose': opts.verbose,

View File

@ -8,8 +8,8 @@ import random
from ..compat import compat_os_name from ..compat import compat_os_name
from ..utils import ( from ..utils import (
decodeArgument, decode_argument,
encodeFilename, encode_filename,
error_to_compat_str, error_to_compat_str,
format_bytes, format_bytes,
shell_quote, shell_quote,
@ -181,7 +181,7 @@ class FileDownloader(object):
def temp_name(self, filename): def temp_name(self, filename):
"""Returns a temporary filename for the given filename.""" """Returns a temporary filename for the given filename."""
if self.params.get('nopart', False) or filename == '-' or \ if self.params.get('nopart', False) or filename == '-' or \
(os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))): (os.path.exists(encode_filename(filename)) and not os.path.isfile(encode_filename(filename))):
return filename return filename
return filename + '.part' return filename + '.part'
@ -197,7 +197,7 @@ class FileDownloader(object):
try: try:
if old_filename == new_filename: if old_filename == new_filename:
return return
os.rename(encodeFilename(old_filename), encodeFilename(new_filename)) os.rename(encode_filename(old_filename), encode_filename(new_filename))
except (IOError, OSError) as err: except (IOError, OSError) as err:
self.report_error('unable to rename file: %s' % error_to_compat_str(err)) self.report_error('unable to rename file: %s' % error_to_compat_str(err))
@ -205,7 +205,7 @@ class FileDownloader(object):
"""Try to set the last-modified time of the given file.""" """Try to set the last-modified time of the given file."""
if last_modified_hdr is None: if last_modified_hdr is None:
return return
if not os.path.isfile(encodeFilename(filename)): if not os.path.isfile(encode_filename(filename)):
return return
timestr = last_modified_hdr timestr = last_modified_hdr
if timestr is None: if timestr is None:
@ -331,14 +331,14 @@ class FileDownloader(object):
nooverwrites_and_exists = ( nooverwrites_and_exists = (
self.params.get('nooverwrites', False) and self.params.get('nooverwrites', False) and
os.path.exists(encodeFilename(filename)) os.path.exists(encode_filename(filename))
) )
if not hasattr(filename, 'write'): if not hasattr(filename, 'write'):
continuedl_and_exists = ( continuedl_and_exists = (
self.params.get('continuedl', True) and self.params.get('continuedl', True) and
os.path.isfile(encodeFilename(filename)) and os.path.isfile(encode_filename(filename)) and
not self.params.get('nopart', False) not self.params.get('nopart', False)
) )
# Check file already present # Check file already present
@ -347,7 +347,7 @@ class FileDownloader(object):
self._hook_progress({ self._hook_progress({
'filename': filename, 'filename': filename,
'status': 'finished', 'status': 'finished',
'total_bytes': os.path.getsize(encodeFilename(filename)), 'total_bytes': os.path.getsize(encode_filename(filename)),
}) })
return True return True
@ -380,7 +380,7 @@ class FileDownloader(object):
if not self.params.get('verbose', False): if not self.params.get('verbose', False):
return return
str_args = [decodeArgument(a) for a in args] str_args = [decode_argument(a) for a in args]
if exe is None: if exe is None:
exe = os.path.basename(str_args[0]) exe = os.path.basename(str_args[0])

View File

@ -17,8 +17,8 @@ from ..utils import (
cli_valueless_option, cli_valueless_option,
cli_bool_option, cli_bool_option,
cli_configuration_args, cli_configuration_args,
encodeFilename, encode_filename,
encodeArgument, encode_argument,
handle_youtubedl_headers, handle_youtubedl_headers,
check_executable, check_executable,
is_outdated_version, is_outdated_version,
@ -49,7 +49,7 @@ class ExternalFD(FileDownloader):
'elapsed': time.time() - started, 'elapsed': time.time() - started,
} }
if filename != '-': if filename != '-':
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(encode_filename(tmpfilename))
self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize)) self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
status.update({ status.update({
@ -98,7 +98,7 @@ class ExternalFD(FileDownloader):
def _call_downloader(self, tmpfilename, info_dict): def _call_downloader(self, tmpfilename, info_dict):
""" Either overwrite this or implement _make_cmd """ """ Either overwrite this or implement _make_cmd """
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] cmd = [encode_argument(a) for a in self._make_cmd(tmpfilename, info_dict)]
self._debug_cmd(cmd) self._debug_cmd(cmd)
@ -131,7 +131,7 @@ class CurlFD(ExternalFD):
return cmd return cmd
def _call_downloader(self, tmpfilename, info_dict): def _call_downloader(self, tmpfilename, info_dict):
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] cmd = [encode_argument(a) for a in self._make_cmd(tmpfilename, info_dict)]
self._debug_cmd(cmd) self._debug_cmd(cmd)
@ -311,8 +311,8 @@ class FFmpegFD(ExternalFD):
else: else:
args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])] args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])]
args = [encodeArgument(opt) for opt in args] args = [encode_argument(opt) for opt in args]
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) args.append(encode_filename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
self._debug_cmd(args) self._debug_cmd(args)

View File

@ -8,7 +8,7 @@ from .common import FileDownloader
from .http import HttpFD from .http import HttpFD
from ..utils import ( from ..utils import (
error_to_compat_str, error_to_compat_str,
encodeFilename, encode_filename,
sanitize_open, sanitize_open,
sanitized_Request, sanitized_Request,
) )
@ -117,7 +117,7 @@ class FragmentFD(FileDownloader):
if self.__do_ytdl_file(ctx): if self.__do_ytdl_file(ctx):
self._write_ytdl_file(ctx) self._write_ytdl_file(ctx)
if not self.params.get('keep_fragments', False): if not self.params.get('keep_fragments', False):
os.remove(encodeFilename(ctx['fragment_filename_sanitized'])) os.remove(encode_filename(ctx['fragment_filename_sanitized']))
del ctx['fragment_filename_sanitized'] del ctx['fragment_filename_sanitized']
def _prepare_frag_download(self, ctx): def _prepare_frag_download(self, ctx):
@ -150,9 +150,9 @@ class FragmentFD(FileDownloader):
resume_len = 0 resume_len = 0
# Establish possible resume length # Establish possible resume length
if os.path.isfile(encodeFilename(tmpfilename)): if os.path.isfile(encode_filename(tmpfilename)):
open_mode = 'ab' open_mode = 'ab'
resume_len = os.path.getsize(encodeFilename(tmpfilename)) resume_len = os.path.getsize(encode_filename(tmpfilename))
# Should be initialized before ytdl file check # Should be initialized before ytdl file check
ctx.update({ ctx.update({
@ -161,7 +161,7 @@ class FragmentFD(FileDownloader):
}) })
if self.__do_ytdl_file(ctx): if self.__do_ytdl_file(ctx):
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))): if os.path.isfile(encode_filename(self.ytdl_filename(ctx['filename']))):
self._read_ytdl_file(ctx) self._read_ytdl_file(ctx)
is_corrupt = ctx.get('ytdl_corrupt') is True is_corrupt = ctx.get('ytdl_corrupt') is True
is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0 is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
@ -248,7 +248,7 @@ class FragmentFD(FileDownloader):
def _finish_frag_download(self, ctx): def _finish_frag_download(self, ctx):
ctx['dest_stream'].close() ctx['dest_stream'].close()
if self.__do_ytdl_file(ctx): if self.__do_ytdl_file(ctx):
ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename'])) ytdl_filename = encode_filename(self.ytdl_filename(ctx['filename']))
if os.path.isfile(ytdl_filename): if os.path.isfile(ytdl_filename):
os.remove(ytdl_filename) os.remove(ytdl_filename)
elapsed = time.time() - ctx['started'] elapsed = time.time() - ctx['started']
@ -257,7 +257,7 @@ class FragmentFD(FileDownloader):
downloaded_bytes = ctx['complete_frags_downloaded_bytes'] downloaded_bytes = ctx['complete_frags_downloaded_bytes']
else: else:
self.try_rename(ctx['tmpfilename'], ctx['filename']) self.try_rename(ctx['tmpfilename'], ctx['filename'])
downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename'])) downloaded_bytes = os.path.getsize(encode_filename(ctx['filename']))
self._hook_progress({ self._hook_progress({
'downloaded_bytes': downloaded_bytes, 'downloaded_bytes': downloaded_bytes,

View File

@ -14,7 +14,7 @@ from ..compat import (
) )
from ..utils import ( from ..utils import (
ContentTooShortError, ContentTooShortError,
encodeFilename, encode_filename,
int_or_none, int_or_none,
sanitize_open, sanitize_open,
sanitized_Request, sanitized_Request,
@ -58,9 +58,9 @@ class HttpFD(FileDownloader):
if self.params.get('continuedl', True): if self.params.get('continuedl', True):
# Establish possible resume length # Establish possible resume length
if os.path.isfile(encodeFilename(ctx.tmpfilename)): if os.path.isfile(encode_filename(ctx.tmpfilename)):
ctx.resume_len = os.path.getsize( ctx.resume_len = os.path.getsize(
encodeFilename(ctx.tmpfilename)) encode_filename(ctx.tmpfilename))
ctx.is_resume = ctx.resume_len > 0 ctx.is_resume = ctx.resume_len > 0
@ -221,7 +221,7 @@ class HttpFD(FileDownloader):
if not to_stdout: if not to_stdout:
ctx.stream.close() ctx.stream.close()
ctx.stream = None ctx.stream = None
ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename)) ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encode_filename(ctx.tmpfilename))
raise RetryDownload(e) raise RetryDownload(e)
while True: while True:

View File

@ -9,8 +9,8 @@ from .common import FileDownloader
from ..compat import compat_str from ..compat import compat_str
from ..utils import ( from ..utils import (
check_executable, check_executable,
encodeFilename, encode_filename,
encodeArgument, encode_argument,
get_exe_version, get_exe_version,
) )
@ -156,7 +156,7 @@ class RtmpFD(FileDownloader):
if not live and continue_dl: if not live and continue_dl:
args += ['--skip', '1'] args += ['--skip', '1']
args = [encodeArgument(a) for a in args] args = [encode_argument(a) for a in args]
self._debug_cmd(args, exe='rtmpdump') self._debug_cmd(args, exe='rtmpdump')
@ -180,15 +180,15 @@ class RtmpFD(FileDownloader):
return False return False
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live: while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
prevsize = os.path.getsize(encodeFilename(tmpfilename)) prevsize = os.path.getsize(encode_filename(tmpfilename))
self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize) self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize)
time.sleep(5.0) # This seems to be needed time.sleep(5.0) # This seems to be needed
args = basic_args + ['--resume'] args = basic_args + ['--resume']
if retval == RD_FAILED: if retval == RD_FAILED:
args += ['--skip', '1'] args += ['--skip', '1']
args = [encodeArgument(a) for a in args] args = [encode_argument(a) for a in args]
retval = run_rtmpdump(args) retval = run_rtmpdump(args)
cursize = os.path.getsize(encodeFilename(tmpfilename)) cursize = os.path.getsize(encode_filename(tmpfilename))
if prevsize == cursize and retval == RD_FAILED: if prevsize == cursize and retval == RD_FAILED:
break break
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
@ -197,7 +197,7 @@ class RtmpFD(FileDownloader):
retval = RD_SUCCESS retval = RD_SUCCESS
break break
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE): if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(encode_filename(tmpfilename))
self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize) self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize)
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
self._hook_progress({ self._hook_progress({

View File

@ -6,7 +6,7 @@ import subprocess
from .common import FileDownloader from .common import FileDownloader
from ..utils import ( from ..utils import (
check_executable, check_executable,
encodeFilename, encode_filename,
) )
@ -31,7 +31,7 @@ class RtspFD(FileDownloader):
retval = subprocess.call(args) retval = subprocess.call(args)
if retval == 0: if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(encode_filename(tmpfilename))
self.to_screen('\r[%s] %s bytes' % (args[0], fsize)) self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
self._hook_progress({ self._hook_progress({

View File

@ -13,7 +13,7 @@ from ..utils import (
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
try_get, try_get,
unescapeHTML, unescape_html,
update_url_query, update_url_query,
) )
@ -131,7 +131,7 @@ class ABCIViewIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
video_params = self._download_json( video_params = self._download_json(
'https://iview.abc.net.au/api/programs/' + video_id, video_id) 'https://iview.abc.net.au/api/programs/' + video_id, video_id)
title = unescapeHTML(video_params.get('title') or video_params['seriesTitle']) title = unescape_html(video_params.get('title') or video_params['seriesTitle'])
stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream')) stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream'))
house_number = video_params.get('episodeHouseNumber') or video_id house_number = video_params.get('episodeHouseNumber') or video_id
@ -179,7 +179,7 @@ class ABCIViewIE(InfoExtractor):
'thumbnail': video_params.get('thumbnail'), 'thumbnail': video_params.get('thumbnail'),
'duration': int_or_none(video_params.get('eventDuration')), 'duration': int_or_none(video_params.get('eventDuration')),
'timestamp': parse_iso8601(video_params.get('pubDate'), ' '), 'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
'series': unescapeHTML(video_params.get('seriesTitle')), 'series': unescape_html(video_params.get('seriesTitle')),
'series_id': video_params.get('seriesHouseNumber') or video_id[:7], 'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
'season_number': int_or_none(self._search_regex( 'season_number': int_or_none(self._search_regex(
r'\bSeries\s+(\d+)\b', title, 'season number', default=None)), r'\bSeries\s+(\d+)\b', title, 'season number', default=None)),

View File

@ -11,7 +11,7 @@ from ..compat import (
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
unescapeHTML, unescape_html,
urlencode_postdata, urlencode_postdata,
unified_timestamp, unified_timestamp,
ExtractorError, ExtractorError,
@ -1385,7 +1385,7 @@ class AdobePassIE(InfoExtractor):
if not redirect_url: if not redirect_url:
return None return None
if url: if url:
redirect_url = compat_urlparse.urljoin(url, unescapeHTML(redirect_url)) redirect_url = compat_urlparse.urljoin(url, unescape_html(redirect_url))
return redirect_url return redirect_url
mvpd_headers = { mvpd_headers = {
@ -1520,7 +1520,7 @@ class AdobePassIE(InfoExtractor):
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
count += 1 count += 1
continue continue
authn_token = unescapeHTML(xml_text(session, 'authnToken')) authn_token = unescape_html(xml_text(session, 'authnToken'))
requestor_info['authn_token'] = authn_token requestor_info['authn_token'] = authn_token
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info) self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
@ -1543,7 +1543,7 @@ class AdobePassIE(InfoExtractor):
continue continue
if '<error' in authorize: if '<error' in authorize:
raise ExtractorError(xml_text(authorize, 'details'), expected=True) raise ExtractorError(xml_text(authorize, 'details'), expected=True)
authz_token = unescapeHTML(xml_text(authorize, 'authzToken')) authz_token = unescape_html(xml_text(authorize, 'authzToken'))
requestor_info[guid] = authz_token requestor_info[guid] = authz_token
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info) self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)

View File

@ -6,7 +6,7 @@ from .theplatform import ThePlatformIE
from ..utils import ( from ..utils import (
smuggle_url, smuggle_url,
update_url_query, update_url_query,
unescapeHTML, unescape_html,
extract_attributes, extract_attributes,
get_element_by_attribute, get_element_by_attribute,
) )
@ -219,7 +219,7 @@ class HistoryTopicIE(AENetworksBaseIE):
if video_display_id: if video_display_id:
webpage = self._download_webpage(url, video_display_id) webpage = self._download_webpage(url, video_display_id)
release_url, video_id = re.search(r"_videoPlayer.play\('([^']+)'\s*,\s*'[^']+'\s*,\s*'(\d+)'\)", webpage).groups() release_url, video_id = re.search(r"_videoPlayer.play\('([^']+)'\s*,\s*'[^']+'\s*,\s*'(\d+)'\)", webpage).groups()
release_url = unescapeHTML(release_url) release_url = unescape_html(release_url)
return self.theplatform_url_result( return self.theplatform_url_result(
release_url, video_id, { release_url, video_id, {

View File

@ -17,7 +17,7 @@ from ..utils import (
intlist_to_bytes, intlist_to_bytes,
int_or_none, int_or_none,
strip_jsonp, strip_jsonp,
unescapeHTML, unescape_html,
unsmuggle_url, unsmuggle_url,
) )
@ -272,7 +272,7 @@ class AnvatoIE(InfoExtractor):
entries = [] entries = []
for mobj in re.finditer(AnvatoIE._ANVP_RE, webpage): for mobj in re.finditer(AnvatoIE._ANVP_RE, webpage):
anvplayer_data = ie._parse_json( anvplayer_data = ie._parse_json(
mobj.group('anvp'), video_id, transform_source=unescapeHTML, mobj.group('anvp'), video_id, transform_source=unescape_html,
fatal=False) fatal=False)
if not anvplayer_data: if not anvplayer_data:
continue continue

View File

@ -5,7 +5,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
unescapeHTML, unescape_html,
) )
@ -27,7 +27,7 @@ class ATVAtIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video_data = self._parse_json(unescapeHTML(self._search_regex( video_data = self._parse_json(unescape_html(self._search_regex(
[r'flashPlayerOptions\s*=\s*(["\'])(?P<json>(?:(?!\1).)+)\1', [r'flashPlayerOptions\s*=\s*(["\'])(?P<json>(?:(?!\1).)+)\1',
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P<json>[^"]+)"'], r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P<json>[^"]+)"'],
webpage, 'player data', group='json')), webpage, 'player data', group='json')),

View File

@ -4,7 +4,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import unescapeHTML from ..utils import unescape_html
class BaiduVideoIE(InfoExtractor): class BaiduVideoIE(InfoExtractor):
@ -43,7 +43,7 @@ class BaiduVideoIE(InfoExtractor):
'xqinfo', category, playlist_id, 'Download playlist JSON metadata') 'xqinfo', category, playlist_id, 'Download playlist JSON metadata')
playlist_title = playlist_detail['title'] playlist_title = playlist_detail['title']
playlist_description = unescapeHTML(playlist_detail.get('intro')) playlist_description = unescape_html(playlist_detail.get('intro'))
episodes_detail = self._call_api( episodes_detail = self._call_api(
'xqsingle', category, playlist_id, 'Download episodes JSON metadata') 'xqsingle', category, playlist_id, 'Download episodes JSON metadata')

View File

@ -17,7 +17,7 @@ from ..utils import (
parse_filesize, parse_filesize,
str_or_none, str_or_none,
try_get, try_get,
unescapeHTML, unescape_html,
update_url_query, update_url_query,
unified_strdate, unified_strdate,
unified_timestamp, unified_timestamp,
@ -141,7 +141,7 @@ class BandcampIE(InfoExtractor):
self._search_regex( self._search_regex(
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage, r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
'blob', group='blob'), 'blob', group='blob'),
track_id, transform_source=unescapeHTML) track_id, transform_source=unescape_html)
info = try_get( info = try_get(
blob, (lambda x: x['digital_items'][0], blob, (lambda x: x['digital_items'][0],
@ -359,7 +359,7 @@ class BandcampWeeklyIE(InfoExtractor):
self._search_regex( self._search_regex(
r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage, r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
'blob', group='blob'), 'blob', group='blob'),
video_id, transform_source=unescapeHTML) video_id, transform_source=unescape_html)
show = blob['bcw_show'] show = blob['bcw_show']

View File

@ -16,7 +16,7 @@ from ..utils import (
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
try_get, try_get,
unescapeHTML, unescape_html,
urlencode_postdata, urlencode_postdata,
urljoin, urljoin,
) )
@ -895,7 +895,7 @@ class BBCIE(BBCCoUkIE):
if data_playables: if data_playables:
for _, data_playable_json in data_playables: for _, data_playable_json in data_playables:
data_playable = self._parse_json( data_playable = self._parse_json(
unescapeHTML(data_playable_json), playlist_id, fatal=False) unescape_html(data_playable_json), playlist_id, fatal=False)
if not data_playable: if not data_playable:
continue continue
settings = data_playable.get('settings', {}) settings = data_playable.get('settings', {})

View File

@ -4,7 +4,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
unescapeHTML, unescape_html,
) )
@ -32,8 +32,8 @@ class BildIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': unescapeHTML(video_data['title']).strip(), 'title': unescape_html(video_data['title']).strip(),
'description': unescapeHTML(video_data.get('description')), 'description': unescape_html(video_data.get('description')),
'url': video_data['clipList'][0]['srces'][0]['src'], 'url': video_data['clipList'][0]['srces'][0]['src'],
'thumbnail': video_data.get('poster'), 'thumbnail': video_data.get('poster'),
'duration': int_or_none(video_data.get('durationSec')), 'duration': int_or_none(video_data.get('durationSec')),

View File

@ -6,7 +6,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
orderedSet, ordered_set,
urlencode_postdata, urlencode_postdata,
) )
@ -54,7 +54,7 @@ class BitChuteIE(InfoExtractor):
formats = [ formats = [
{'url': format_url} {'url': format_url}
for format_url in orderedSet(format_urls)] for format_url in ordered_set(format_urls)]
self._check_formats(formats, video_id) self._check_formats(formats, video_id)
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -27,7 +27,7 @@ from ..utils import (
js_to_json, js_to_json,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
unescapeHTML, unescape_html,
unsmuggle_url, unsmuggle_url,
update_url_query, update_url_query,
clean_html, clean_html,
@ -259,7 +259,7 @@ class BrightcoveLegacyIE(InfoExtractor):
content=([\'"])(?P<url>https?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2 content=([\'"])(?P<url>https?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2
''', webpage) ''', webpage)
if url_m: if url_m:
url = unescapeHTML(url_m.group('url')) url = unescape_html(url_m.group('url'))
# Some sites don't add it, we can't download with this url, for example: # Some sites don't add it, we can't download with this url, for example:
# http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/ # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
if 'playerKey' in url or 'videoId' in url or 'idVideo' in url: if 'playerKey' in url or 'videoId' in url or 'idVideo' in url:

View File

@ -17,7 +17,7 @@ from ..utils import (
xpath_element, xpath_element,
xpath_with_ns, xpath_with_ns,
find_xpath_attr, find_xpath_attr,
orderedSet, ordered_set,
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
parse_age_limit, parse_age_limit,
@ -145,7 +145,7 @@ class CBCIE(InfoExtractor):
media_ids.extend(re.findall(media_id_re, webpage)) media_ids.extend(re.findall(media_id_re, webpage))
entries.extend([ entries.extend([
self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
for media_id in orderedSet(media_ids)]) for media_id in ordered_set(media_ids)])
return self.playlist_result( return self.playlist_result(
entries, display_id, strip_or_none(title), entries, display_id, strip_or_none(title),
self._og_search_description(webpage)) self._og_search_description(webpage))

View File

@ -12,7 +12,7 @@ from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
sanitized_Request, sanitized_Request,
unescapeHTML, unescape_html,
update_url_query, update_url_query,
urlencode_postdata, urlencode_postdata,
USER_AGENTS, USER_AGENTS,
@ -277,7 +277,7 @@ class CeskaTelevizePoradyIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
data_url = update_url_query(unescapeHTML(self._search_regex( data_url = update_url_query(unescape_html(self._search_regex(
(r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1', (r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'), r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
webpage, 'iframe player url', group='url')), query={ webpage, 'iframe player url', group='url')), query={

View File

@ -9,7 +9,7 @@ from ..utils import (
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
qualities, qualities,
unescapeHTML, unescape_html,
) )
@ -108,7 +108,7 @@ class Channel9IE(InfoExtractor):
episode_data = self._search_regex( episode_data = self._search_regex(
r"data-episode='([^']+)'", webpage, 'episode data', default=None) r"data-episode='([^']+)'", webpage, 'episode data', default=None)
if episode_data: if episode_data:
episode_data = self._parse_json(unescapeHTML( episode_data = self._parse_json(unescape_html(
episode_data), content_path) episode_data), content_path)
content_id = episode_data['contentId'] content_id = episode_data['contentId']
is_session = '/Sessions(' in episode_data['api'] is_session = '/Sessions(' in episode_data['api']

View File

@ -6,7 +6,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
unescapeHTML, unescape_html,
) )
@ -36,7 +36,7 @@ class CJSWIE(InfoExtractor):
webpage = self._download_webpage(url, episode_id) webpage = self._download_webpage(url, episode_id)
title = unescapeHTML(self._search_regex( title = unescape_html(self._search_regex(
(r'<h1[^>]+class=["\']episode-header__title["\'][^>]*>(?P<title>[^<]+)', (r'<h1[^>]+class=["\']episode-header__title["\'][^>]*>(?P<title>[^<]+)',
r'data-audio-title=(["\'])(?P<title>(?:(?!\1).)+)\1'), r'data-audio-title=(["\'])(?P<title>(?:(?!\1).)+)\1'),
webpage, 'title', group='title')) webpage, 'title', group='title'))

View File

@ -54,7 +54,7 @@ from ..utils import (
js_to_json, js_to_json,
JSON_LD_RE, JSON_LD_RE,
mimetype2ext, mimetype2ext,
orderedSet, ordered_set,
parse_codecs, parse_codecs,
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
@ -62,7 +62,7 @@ from ..utils import (
RegexNotFoundError, RegexNotFoundError,
sanitized_Request, sanitized_Request,
sanitize_filename, sanitize_filename,
unescapeHTML, unescape_html,
unified_strdate, unified_strdate,
unified_timestamp, unified_timestamp,
update_Request, update_Request,
@ -932,7 +932,7 @@ class InfoExtractor(object):
return video_info return video_info
def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None): def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None):
urls = orderedSet( urls = ordered_set(
self.url_result(self._proto_relative_url(getter(m) if getter else m), ie) self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
for m in matches) for m in matches)
return self.playlist_result( return self.playlist_result(
@ -1083,7 +1083,7 @@ class InfoExtractor(object):
escaped = self._search_regex(og_regexes, html, name, flags=re.DOTALL, **kargs) escaped = self._search_regex(og_regexes, html, name, flags=re.DOTALL, **kargs)
if escaped is None: if escaped is None:
return None return None
return unescapeHTML(escaped) return unescape_html(escaped)
def _og_search_thumbnail(self, html, **kargs): def _og_search_thumbnail(self, html, **kargs):
return self._og_search_property('image', html, 'thumbnail URL', fatal=False, **kargs) return self._og_search_property('image', html, 'thumbnail URL', fatal=False, **kargs)
@ -1220,8 +1220,8 @@ class InfoExtractor(object):
assert e['@type'] == 'VideoObject' assert e['@type'] == 'VideoObject'
info.update({ info.update({
'url': url_or_none(e.get('contentUrl')), 'url': url_or_none(e.get('contentUrl')),
'title': unescapeHTML(e.get('name')), 'title': unescape_html(e.get('name')),
'description': unescapeHTML(e.get('description')), 'description': unescape_html(e.get('description')),
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')), 'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
'duration': parse_duration(e.get('duration')), 'duration': parse_duration(e.get('duration')),
'timestamp': unified_timestamp(e.get('uploadDate')), 'timestamp': unified_timestamp(e.get('uploadDate')),
@ -1239,11 +1239,11 @@ class InfoExtractor(object):
if expected_type is not None and expected_type != item_type: if expected_type is not None and expected_type != item_type:
return info return info
if item_type in ('TVEpisode', 'Episode'): if item_type in ('TVEpisode', 'Episode'):
episode_name = unescapeHTML(e.get('name')) episode_name = unescape_html(e.get('name'))
info.update({ info.update({
'episode': episode_name, 'episode': episode_name,
'episode_number': int_or_none(e.get('episodeNumber')), 'episode_number': int_or_none(e.get('episodeNumber')),
'description': unescapeHTML(e.get('description')), 'description': unescape_html(e.get('description')),
}) })
if not info.get('title') and episode_name: if not info.get('title') and episode_name:
info['title'] = episode_name info['title'] = episode_name
@ -1252,19 +1252,19 @@ class InfoExtractor(object):
info['season_number'] = int_or_none(part_of_season.get('seasonNumber')) info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries') part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'): if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
info['series'] = unescapeHTML(part_of_series.get('name')) info['series'] = unescape_html(part_of_series.get('name'))
elif item_type == 'Movie': elif item_type == 'Movie':
info.update({ info.update({
'title': unescapeHTML(e.get('name')), 'title': unescape_html(e.get('name')),
'description': unescapeHTML(e.get('description')), 'description': unescape_html(e.get('description')),
'duration': parse_duration(e.get('duration')), 'duration': parse_duration(e.get('duration')),
'timestamp': unified_timestamp(e.get('dateCreated')), 'timestamp': unified_timestamp(e.get('dateCreated')),
}) })
elif item_type in ('Article', 'NewsArticle'): elif item_type in ('Article', 'NewsArticle'):
info.update({ info.update({
'timestamp': parse_iso8601(e.get('datePublished')), 'timestamp': parse_iso8601(e.get('datePublished')),
'title': unescapeHTML(e.get('headline')), 'title': unescape_html(e.get('headline')),
'description': unescapeHTML(e.get('articleBody')), 'description': unescape_html(e.get('articleBody')),
}) })
elif item_type == 'VideoObject': elif item_type == 'VideoObject':
extract_video_object(e) extract_video_object(e)
@ -2628,7 +2628,7 @@ class InfoExtractor(object):
entry = { entry = {
'id': this_video_id, 'id': this_video_id,
'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')), 'title': unescape_html(video_data['title'] if require_title else video_data.get('title')),
'description': video_data.get('description'), 'description': video_data.get('description'),
'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))), 'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))),
'timestamp': int_or_none(video_data.get('pubdate')), 'timestamp': int_or_none(video_data.get('pubdate')),

View File

@ -14,7 +14,7 @@ from ..utils import (
int_or_none, int_or_none,
js_to_json, js_to_json,
mimetype2ext, mimetype2ext,
orderedSet, ordered_set,
parse_iso8601, parse_iso8601,
) )
@ -111,7 +111,7 @@ class CondeNastIE(InfoExtractor):
base_url = '%s://%s' % (url_object.scheme, url_object.netloc) base_url = '%s://%s' % (url_object.scheme, url_object.netloc)
m_paths = re.finditer( m_paths = re.finditer(
r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage) r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage)
paths = orderedSet(m.group(1) for m in m_paths) paths = ordered_set(m.group(1) for m in m_paths)
build_url = lambda path: compat_urlparse.urljoin(base_url, path) build_url = lambda path: compat_urlparse.urljoin(base_url, path)
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths] entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
return self.playlist_result(entries, playlist_title=title) return self.playlist_result(entries, playlist_title=title)

View File

@ -11,7 +11,7 @@ from ..utils import (
get_element_by_class, get_element_by_class,
int_or_none, int_or_none,
smuggle_url, smuggle_url,
unescapeHTML, unescape_html,
) )
from .senateisvp import SenateISVPIE from .senateisvp import SenateISVPIE
from .ustream import UstreamIE from .ustream import UstreamIE
@ -154,12 +154,12 @@ class CSpanIE(InfoExtractor):
for quality in f.get('qualities', []): for quality in f.get('qualities', []):
formats.append({ formats.append({
'format_id': '%s-%sp' % (get_text_attr(quality, 'bitrate'), get_text_attr(quality, 'height')), 'format_id': '%s-%sp' % (get_text_attr(quality, 'bitrate'), get_text_attr(quality, 'height')),
'url': unescapeHTML(get_text_attr(quality, 'file')), 'url': unescape_html(get_text_attr(quality, 'file')),
'height': int_or_none(get_text_attr(quality, 'height')), 'height': int_or_none(get_text_attr(quality, 'height')),
'tbr': int_or_none(get_text_attr(quality, 'bitrate')), 'tbr': int_or_none(get_text_attr(quality, 'bitrate')),
}) })
if not formats: if not formats:
path = unescapeHTML(get_text_attr(f, 'path')) path = unescape_html(get_text_attr(f, 'path'))
if not path: if not path:
continue continue
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(

View File

@ -4,7 +4,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import orderedSet from ..utils import ordered_set
class CTVNewsIE(InfoExtractor): class CTVNewsIE(InfoExtractor):
@ -63,6 +63,6 @@ class CTVNewsIE(InfoExtractor):
'ot': 'example.AjaxPageLayout.ot', 'ot': 'example.AjaxPageLayout.ot',
'maxItemsPerPage': 1000000, 'maxItemsPerPage': 1000000,
}) })
entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet( entries = [ninecninemedia_url_result(clip_id) for clip_id in ordered_set(
re.findall(r'clip\.id\s*=\s*(\d+);', webpage))] re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
return self.playlist_result(entries, page_id) return self.playlist_result(entries, page_id)

View File

@ -9,7 +9,7 @@ from ..utils import (
int_or_none, int_or_none,
determine_protocol, determine_protocol,
try_get, try_get,
unescapeHTML, unescape_html,
) )
@ -40,7 +40,7 @@ class DailyMailIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_data = self._parse_json(self._search_regex( video_data = self._parse_json(self._search_regex(
r"data-opts='({.+?})'", webpage, 'video data'), video_id) r"data-opts='({.+?})'", webpage, 'video data'), video_id)
title = unescapeHTML(video_data['title']) title = unescape_html(video_data['title'])
sources_url = (try_get( sources_url = (try_get(
video_data, video_data,
@ -78,7 +78,7 @@ class DailyMailIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': unescapeHTML(video_data.get('descr')), 'description': unescape_html(video_data.get('descr')),
'thumbnail': video_data.get('poster') or video_data.get('thumbnail'), 'thumbnail': video_data.get('poster') or video_data.get('thumbnail'),
'formats': formats, 'formats': formats,
} }

View File

@ -23,7 +23,7 @@ from ..utils import (
sanitized_Request, sanitized_Request,
str_to_int, str_to_int,
try_get, try_get,
unescapeHTML, unescape_html,
update_url_query, update_url_query,
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
@ -140,7 +140,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
# Look for embedded Dailymotion player # Look for embedded Dailymotion player
matches = re.findall( matches = re.findall(
r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage) r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
return list(map(lambda m: unescapeHTML(m[1]), matches)) return list(map(lambda m: unescape_html(m[1]), matches))
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -481,7 +481,7 @@ class DailymotionUserIE(DailymotionBaseInfoExtractor):
user = mobj.group('user') user = mobj.group('user')
webpage = self._download_webpage( webpage = self._download_webpage(
'https://www.dailymotion.com/user/%s' % user, user) 'https://www.dailymotion.com/user/%s' % user, user)
full_user = unescapeHTML(self._html_search_regex( full_user = unescape_html(self._html_search_regex(
r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user), r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
webpage, 'user')) webpage, 'user'))

View File

@ -16,7 +16,7 @@ from ..utils import (
int_or_none, int_or_none,
str_to_int, str_to_int,
xpath_text, xpath_text,
unescapeHTML, unescape_html,
) )
@ -162,7 +162,7 @@ class DaumClipIE(InfoExtractor):
'_type': 'url_transparent', '_type': 'url_transparent',
'id': video_id, 'id': video_id,
'url': 'http://tvpot.daum.net/v/%s' % clip_info['vid'], 'url': 'http://tvpot.daum.net/v/%s' % clip_info['vid'],
'title': unescapeHTML(clip_info['title']), 'title': unescape_html(clip_info['title']),
'thumbnail': clip_info.get('thumb_url'), 'thumbnail': clip_info.get('thumb_url'),
'description': clip_info.get('contents'), 'description': clip_info.get('contents'),
'duration': int_or_none(clip_info.get('duration')), 'duration': int_or_none(clip_info.get('duration')),

View File

@ -7,7 +7,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
orderedSet, ordered_set,
) )
@ -70,7 +70,7 @@ class DeezerPlaylistIE(InfoExtractor):
}] }]
self._sort_formats(formats) self._sort_formats(formats)
artists = ', '.join( artists = ', '.join(
orderedSet(a['ART_NAME'] for a in s['ARTISTS'])) ordered_set(a['ART_NAME'] for a in s['ARTISTS']))
entries.append({ entries.append({
'id': s['SNG_ID'], 'id': s['SNG_ID'],
'duration': int_or_none(s.get('DURATION')), 'duration': int_or_none(s.get('DURATION')),

View File

@ -10,7 +10,7 @@ from ..utils import (
int_or_none, int_or_none,
parse_age_limit, parse_age_limit,
remove_end, remove_end,
unescapeHTML, unescape_html,
url_or_none, url_or_none,
) )
@ -158,7 +158,7 @@ class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE):
for mobj in re.finditer(r'data-json=(["\'])(?P<json>{.+?})\1', webpage): for mobj in re.finditer(r'data-json=(["\'])(?P<json>{.+?})\1', webpage):
data = self._parse_json( data = self._parse_json(
mobj.group('json'), display_id, mobj.group('json'), display_id,
transform_source=unescapeHTML, fatal=False) transform_source=unescape_html, fatal=False)
if not isinstance(data, dict) or data.get('type') != 'episode': if not isinstance(data, dict) or data.get('type') != 'episode':
continue continue
episode_url = data.get('socialUrl') episode_url = data.get('socialUrl')

View File

@ -8,7 +8,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
unescapeHTML, unescape_html,
unified_strdate, unified_strdate,
urljoin, urljoin,
) )
@ -105,7 +105,7 @@ class DouyuTVIE(InfoExtractor):
'aid': 'pcclient' 'aid': 'pcclient'
})['data']['live_url'] })['data']['live_url']
title = self._live_title(unescapeHTML(room['room_name'])) title = self._live_title(unescape_html(room['room_name']))
description = room.get('show_details') description = room.get('show_details')
thumbnail = room.get('room_src') thumbnail = room.get('room_src')
uploader = room.get('nickname') uploader = room.get('nickname')

View File

@ -6,7 +6,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
js_to_json, js_to_json,
parse_duration, parse_duration,
unescapeHTML, unescape_html,
) )
@ -41,7 +41,7 @@ class DRBonanzaIE(InfoExtractor):
r'(?s)currentAsset\s*=\s*({.+?})\s*</script', webpage, 'asset'), r'(?s)currentAsset\s*=\s*({.+?})\s*</script', webpage, 'asset'),
display_id, transform_source=js_to_json) display_id, transform_source=js_to_json)
title = unescapeHTML(asset['AssetTitle']).strip() title = unescape_html(asset['AssetTitle']).strip()
def extract(field): def extract(field):
return self._search_regex( return self._search_regex(

View File

@ -10,7 +10,7 @@ from ..utils import (
int_or_none, int_or_none,
js_to_json, js_to_json,
mimetype2ext, mimetype2ext,
unescapeHTML, unescape_html,
) )
@ -99,7 +99,7 @@ class DVTVIE(InfoExtractor):
data.update(self._parse_json( data.update(self._parse_json(
live_js, video_id, transform_source=js_to_json)) live_js, video_id, transform_source=js_to_json))
title = unescapeHTML(data['title']) title = unescape_html(data['title'])
formats = [] formats = []
for video in data['sources']: for video in data['sources']:

View File

@ -6,7 +6,7 @@ from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlencode from ..compat import compat_urllib_parse_urlencode
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
unescapeHTML unescape_html
) )
@ -77,7 +77,7 @@ class EroProfileIE(InfoExtractor):
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'], [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
webpage, 'video id', default=None) webpage, 'video id', default=None)
video_url = unescapeHTML(self._search_regex( video_url = unescape_html(self._search_regex(
r'<source src="([^"]+)', webpage, 'video url')) r'<source src="([^"]+)', webpage, 'video url'))
title = self._html_search_regex( title = self._html_search_regex(
r'Title:</th><td>([^<]+)</td>', webpage, 'title') r'Title:</th><td>([^<]+)</td>', webpage, 'title')

View File

@ -5,7 +5,7 @@ from .common import InfoExtractor
from ..compat import compat_urlparse from ..compat import compat_urlparse
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
orderedSet, ordered_set,
parse_duration, parse_duration,
qualities, qualities,
unified_strdate, unified_strdate,
@ -56,7 +56,7 @@ class EuropaIE(InfoExtractor):
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
preferred_lang = query.get('sitelang', ('en', ))[0] preferred_lang = query.get('sitelang', ('en', ))[0]
preferred_langs = orderedSet((preferred_lang, 'en', 'int')) preferred_langs = ordered_set((preferred_lang, 'en', 'int'))
title = get_item('title', preferred_langs) or video_id title = get_item('title', preferred_langs) or video_id
description = get_item('description', preferred_langs) description = get_item('description', preferred_langs)

View File

@ -7,7 +7,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
unescapeHTML, unescape_html,
unified_timestamp, unified_timestamp,
) )
@ -61,7 +61,7 @@ class ExpressenIE(InfoExtractor):
self._search_regex( self._search_regex(
r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1' % name, r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1' % name,
webpage, 'info', group='value'), webpage, 'info', group='value'),
display_id, transform_source=unescapeHTML) display_id, transform_source=unescape_html)
info = extract_data('video-tracking-info') info = extract_data('video-tracking-info')
video_id = info['videoId'] video_id = info['videoId']

View File

@ -7,7 +7,7 @@ from ..compat import (
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
) )
from ..utils import ( from ..utils import (
unescapeHTML, unescape_html,
url_basename, url_basename,
dict_get, dict_get,
) )
@ -51,7 +51,7 @@ class GameSpotIE(OnceIE):
webpage = self._download_webpage(url, page_id) webpage = self._download_webpage(url, page_id)
data_video_json = self._search_regex( data_video_json = self._search_regex(
r'data-video=["\'](.*?)["\']', webpage, 'data video') r'data-video=["\'](.*?)["\']', webpage, 'data video')
data_video = self._parse_json(unescapeHTML(data_video_json), page_id) data_video = self._parse_json(unescape_html(data_video_json), page_id)
streams = data_video['videoStreams'] streams = data_video['videoStreams']
manifest_url = None manifest_url = None
@ -111,7 +111,7 @@ class GameSpotIE(OnceIE):
onceux_json = self._search_regex( onceux_json = self._search_regex(
r'data-onceux-options=["\'](.*?)["\']', webpage, 'data video', default=None) r'data-onceux-options=["\'](.*?)["\']', webpage, 'data video', default=None)
if onceux_json: if onceux_json:
onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri') onceux_url = self._parse_json(unescape_html(onceux_json), page_id).get('metadataUri')
if onceux_url: if onceux_url:
formats.extend(self._extract_once_formats(re.sub( formats.extend(self._extract_once_formats(re.sub(
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url), r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url),

View File

@ -25,10 +25,10 @@ from ..utils import (
KNOWN_EXTENSIONS, KNOWN_EXTENSIONS,
merge_dicts, merge_dicts,
mimetype2ext, mimetype2ext,
orderedSet, ordered_set,
sanitized_Request, sanitized_Request,
smuggle_url, smuggle_url,
unescapeHTML, unescape_html,
unified_strdate, unified_strdate,
unsmuggle_url, unsmuggle_url,
UnsupportedError, UnsupportedError,
@ -2486,7 +2486,7 @@ class GenericIE(InfoExtractor):
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage) r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
if m: if m:
playlists = re.findall( playlists = re.findall(
r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url'))) r'list\[\]=/playlist/([^/]+)/', unescape_html(m.group('url')))
if playlists: if playlists:
return self.playlist_from_matches( return self.playlist_from_matches(
playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p) playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
@ -2515,7 +2515,7 @@ class GenericIE(InfoExtractor):
# Look for Bandcamp pages with custom domain # Look for Bandcamp pages with custom domain
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage) mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
if mobj is not None: if mobj is not None:
burl = unescapeHTML(mobj.group(1)) burl = unescape_html(mobj.group(1))
# Don't set the extractor because it can be a track url or an album # Don't set the extractor because it can be a track url or an album
return self.url_result(burl) return self.url_result(burl)
@ -2631,7 +2631,7 @@ class GenericIE(InfoExtractor):
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage) matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
if matches: if matches:
return self.playlist_from_matches( return self.playlist_from_matches(
matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie') matches, video_id, video_title, getter=unescape_html, ie='FunnyOrDie')
# Look for BBC iPlayer embed # Look for BBC iPlayer embed
matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage) matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
@ -2727,7 +2727,7 @@ class GenericIE(InfoExtractor):
# Look for embedded soundcloud player # Look for embedded soundcloud player
soundcloud_urls = SoundcloudIE._extract_urls(webpage) soundcloud_urls = SoundcloudIE._extract_urls(webpage)
if soundcloud_urls: if soundcloud_urls:
return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key()) return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescape_html, ie=SoundcloudIE.ie_key())
# Look for tunein player # Look for tunein player
tunein_urls = TuneInBaseIE._extract_urls(webpage) tunein_urls = TuneInBaseIE._extract_urls(webpage)
@ -2918,7 +2918,7 @@ class GenericIE(InfoExtractor):
webpage) webpage)
if mobj is not None: if mobj is not None:
return self.url_result( return self.url_result(
self._proto_relative_url(unescapeHTML(mobj.group(1))), self._proto_relative_url(unescape_html(mobj.group(1))),
'AdobeTVVideo') 'AdobeTVVideo')
# Look for Vine embeds # Look for Vine embeds
@ -2927,7 +2927,7 @@ class GenericIE(InfoExtractor):
webpage) webpage)
if mobj is not None: if mobj is not None:
return self.url_result( return self.url_result(
self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine') self._proto_relative_url(unescape_html(mobj.group(1))), 'Vine')
# Look for VODPlatform embeds # Look for VODPlatform embeds
mobj = re.search( mobj = re.search(
@ -2935,7 +2935,7 @@ class GenericIE(InfoExtractor):
webpage) webpage)
if mobj is not None: if mobj is not None:
return self.url_result( return self.url_result(
self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform') self._proto_relative_url(unescape_html(mobj.group('url'))), 'VODPlatform')
# Look for Mangomolo embeds # Look for Mangomolo embeds
mobj = re.search( mobj = re.search(
@ -2947,7 +2947,7 @@ class GenericIE(InfoExtractor):
if mobj is not None: if mobj is not None:
info = { info = {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'url': self._proto_relative_url(unescape_html(mobj.group('url'))),
'title': video_title, 'title': video_title,
'description': video_description, 'description': video_description,
'thumbnail': video_thumbnail, 'thumbnail': video_thumbnail,
@ -3298,7 +3298,7 @@ class GenericIE(InfoExtractor):
refresh_header = refresh_header.decode('iso-8859-1') refresh_header = refresh_header.decode('iso-8859-1')
found = re.search(REDIRECT_REGEX, refresh_header) found = re.search(REDIRECT_REGEX, refresh_header)
if found: if found:
new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1))) new_url = compat_urlparse.urljoin(url, unescape_html(found.group(1)))
if new_url != url: if new_url != url:
self.report_following_redirect(new_url) self.report_following_redirect(new_url)
return { return {
@ -3320,8 +3320,8 @@ class GenericIE(InfoExtractor):
raise UnsupportedError(url) raise UnsupportedError(url)
entries = [] entries = []
for video_url in orderedSet(found): for video_url in ordered_set(found):
video_url = unescapeHTML(video_url) video_url = unescape_html(video_url)
video_url = video_url.replace('\\/', '/') video_url = video_url.replace('\\/', '/')
video_url = compat_urlparse.urljoin(url, video_url) video_url = compat_urlparse.urljoin(url, video_url)
video_id = compat_urllib_parse_unquote(os.path.basename(video_url)) video_id = compat_urllib_parse_unquote(os.path.basename(video_url))

View File

@ -8,7 +8,7 @@ from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
qualities, qualities,
unescapeHTML, unescape_html,
) )
@ -39,7 +39,7 @@ class GiantBombIE(InfoExtractor):
description = self._og_search_description(webpage) description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage) thumbnail = self._og_search_thumbnail(webpage)
video = json.loads(unescapeHTML(self._search_regex( video = json.loads(unescape_html(self._search_regex(
r'data-video="([^"]+)"', webpage, 'data-video'))) r'data-video="([^"]+)"', webpage, 'data-video')))
duration = int_or_none(video.get('lengthSeconds')) duration = int_or_none(video.get('lengthSeconds'))

View File

@ -16,7 +16,7 @@ from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
int_or_none, int_or_none,
orderedSet, ordered_set,
str_or_none, str_or_none,
) )
@ -218,7 +218,7 @@ class GloboArticleIE(InfoExtractor):
video_ids.extend(re.findall(video_regex, webpage)) video_ids.extend(re.findall(video_regex, webpage))
entries = [ entries = [
self.url_result('globo:%s' % video_id, GloboIE.ie_key()) self.url_result('globo:%s' % video_id, GloboIE.ie_key())
for video_id in orderedSet(video_ids)] for video_id in ordered_set(video_ids)]
title = self._og_search_title(webpage, fatal=False) title = self._og_search_title(webpage, fatal=False)
description = self._html_search_meta('description', webpage) description = self._html_search_meta('description', webpage)
return self.playlist_result(entries, display_id, title, description) return self.playlist_result(entries, display_id, title, description)

View File

@ -5,7 +5,7 @@ from ..utils import (
find_xpath_attr, find_xpath_attr,
int_or_none, int_or_none,
js_to_json, js_to_json,
unescapeHTML, unescape_html,
determine_ext, determine_ext,
) )
@ -82,8 +82,8 @@ class HowStuffWorksIE(InfoExtractor):
return { return {
'id': '%s' % video_id, 'id': '%s' % video_id,
'display_id': display_id, 'display_id': display_id,
'title': unescapeHTML(clip_info['clip_title']), 'title': unescape_html(clip_info['clip_title']),
'description': unescapeHTML(clip_info.get('caption')), 'description': unescape_html(clip_info.get('caption')),
'thumbnail': clip_info.get('video_still_url'), 'thumbnail': clip_info.get('video_still_url'),
'duration': int_or_none(clip_info.get('duration')), 'duration': int_or_none(clip_info.get('duration')),
'formats': formats, 'formats': formats,

View File

@ -18,7 +18,7 @@ from ..utils import (
encode_data_uri, encode_data_uri,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
orderedSet, ordered_set,
parse_iso8601, parse_iso8601,
str_or_none, str_or_none,
url_basename, url_basename,
@ -243,7 +243,7 @@ class LePlaylistIE(InfoExtractor):
page = self._download_webpage(url, playlist_id) page = self._download_webpage(url, playlist_id)
# Currently old domain names are still used in playlists # Currently old domain names are still used in playlists
media_ids = orderedSet(re.findall( media_ids = ordered_set(re.findall(
r'<a[^>]+href="http://www\.letv\.com/ptv/vplay/(\d+)\.html', page)) r'<a[^>]+href="http://www\.letv\.com/ptv/vplay/(\d+)\.html', page))
entries = [self.url_result(LeIE._URL_TEMPLATE % media_id, ie='Le') entries = [self.url_result(LeIE._URL_TEMPLATE % media_id, ie='Le')
for media_id in media_ids] for media_id in media_ids]

View File

@ -6,7 +6,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
from ..utils import ( from ..utils import (
unescapeHTML, unescape_html,
parse_duration, parse_duration,
get_element_by_class, get_element_by_class,
) )
@ -68,7 +68,7 @@ class LEGOIE(InfoExtractor):
default='http://www.lego.com/%s/mediaplayer/video/' % locale)) default='http://www.lego.com/%s/mediaplayer/video/' % locale))
player_url = base_url + video_id player_url = base_url + video_id
player_webpage = self._download_webpage(player_url, video_id) player_webpage = self._download_webpage(player_url, video_id)
video_data = self._parse_json(unescapeHTML(self._search_regex( video_data = self._parse_json(unescape_html(self._search_regex(
r"video='([^']+)'", player_webpage, 'video data')), video_id) r"video='([^']+)'", player_webpage, 'video data')), video_id)
progressive_base = self._search_regex( progressive_base = self._search_regex(
r'data-video-progressive-url="([^"]+)"', r'data-video-progressive-url="([^"]+)"',

View File

@ -13,7 +13,7 @@ from ..utils import (
xpath_attr, xpath_attr,
xpath_with_ns, xpath_with_ns,
xpath_text, xpath_text,
orderedSet, ordered_set,
update_url_query, update_url_query,
int_or_none, int_or_none,
float_or_none, float_or_none,
@ -305,7 +305,7 @@ class LivestreamOriginalIE(InfoExtractor):
def _extract_folder(self, url, folder_id): def _extract_folder(self, url, folder_id):
webpage = self._download_webpage(url, folder_id) webpage = self._download_webpage(url, folder_id)
paths = orderedSet(re.findall( paths = ordered_set(re.findall(
r'''(?x)(?: r'''(?x)(?:
<li\s+class="folder">\s*<a\s+href="| <li\s+class="folder">\s*<a\s+href="|
<a\s+href="(?=https?://livestre\.am/) <a\s+href="(?=https?://livestre\.am/)

View File

@ -6,7 +6,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
from ..utils import ( from ..utils import (
orderedSet, ordered_set,
parse_duration, parse_duration,
try_get, try_get,
) )
@ -118,7 +118,7 @@ class MarkizaPageIE(InfoExtractor):
entries = [ entries = [
self.url_result('http://videoarchiv.markiza.sk/video/%s' % video_id) self.url_result('http://videoarchiv.markiza.sk/video/%s' % video_id)
for video_id in orderedSet(re.findall( for video_id in ordered_set(re.findall(
r'(?:initPlayer_|data-entity=["\']|id=["\']player_)(\d+)', r'(?:initPlayer_|data-entity=["\']|id=["\']player_)(\d+)',
webpage))] webpage))]

View File

@ -13,7 +13,7 @@ from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
mimetype2ext, mimetype2ext,
unescapeHTML, unescape_html,
unsmuggle_url, unsmuggle_url,
url_or_none, url_or_none,
urljoin, urljoin,
@ -107,7 +107,7 @@ class MediasiteIE(InfoExtractor):
@staticmethod @staticmethod
def _extract_urls(webpage): def _extract_urls(webpage):
return [ return [
unescapeHTML(mobj.group('url')) unescape_html(mobj.group('url'))
for mobj in re.finditer( for mobj in re.finditer(
r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/[0-9a-f]{32,34}(?:\?.*?)?)\1', r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/[0-9a-f]{32,34}(?:\?.*?)?)\1',
webpage)] webpage)]

View File

@ -4,7 +4,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from .pladform import PladformIE from .pladform import PladformIE
from ..utils import ( from ..utils import (
unescapeHTML, unescape_html,
int_or_none, int_or_none,
ExtractorError, ExtractorError,
) )
@ -46,7 +46,7 @@ class METAIE(InfoExtractor):
json_str = '' json_str = ''
for i in range(0, len(st_html5), 3): for i in range(0, len(st_html5), 3):
json_str += '&#x0%s;' % st_html5[i:i + 3] json_str += '&#x0%s;' % st_html5[i:i + 3]
uppod_data = self._parse_json(unescapeHTML(json_str), video_id) uppod_data = self._parse_json(unescape_html(json_str), video_id)
error = uppod_data.get('customnotfound') error = uppod_data.get('customnotfound')
if error: if error:
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)

View File

@ -8,7 +8,7 @@ from ..compat import compat_urlparse
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
InAdvancePagedList, InAdvancePagedList,
orderedSet, ordered_set,
str_to_int, str_to_int,
unified_strdate, unified_strdate,
) )
@ -169,7 +169,7 @@ class MotherlessGroupIE(InfoExtractor):
self.url_result( self.url_result(
compat_urlparse.urljoin(base, '/' + entry_id), compat_urlparse.urljoin(base, '/' + entry_id),
ie=MotherlessIE.ie_key(), video_id=entry_id) ie=MotherlessIE.ie_key(), video_id=entry_id)
for entry_id in orderedSet(re.findall( for entry_id in ordered_set(re.findall(
r'data-codename=["\']([A-Z0-9]+)', webpage))] r'data-codename=["\']([A-Z0-9]+)', webpage))]
return entries return entries

View File

@ -2,7 +2,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
unescapeHTML, unescape_html,
parse_duration, parse_duration,
) )
@ -36,8 +36,8 @@ class MovingImageIE(InfoExtractor):
r'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name, r'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name,
webpage, 'title', fatal=fatal) webpage, 'title', fatal=fatal)
title = unescapeHTML(search_field('Title', fatal=True)).strip('()[]') title = unescape_html(search_field('Title', fatal=True)).strip('()[]')
description = unescapeHTML(search_field('Description')) description = unescape_html(search_field('Description'))
duration = parse_duration(search_field('Running time')) duration = parse_duration(search_field('Running time'))
thumbnail = self._search_regex( thumbnail = self._search_regex(
r"image\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False) r"image\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)

View File

@ -9,7 +9,7 @@ from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
unescapeHTML, unescape_html,
) )
@ -53,10 +53,10 @@ class MSNIE(InfoExtractor):
self._search_regex( self._search_regex(
r'data-metadata\s*=\s*(["\'])(?P<data>.+?)\1', r'data-metadata\s*=\s*(["\'])(?P<data>.+?)\1',
webpage, 'video data', default='{}', group='data'), webpage, 'video data', default='{}', group='data'),
display_id, transform_source=unescapeHTML) display_id, transform_source=unescape_html)
if not video: if not video:
error = unescapeHTML(self._search_regex( error = unescape_html(self._search_regex(
r'data-error=(["\'])(?P<error>.+?)\1', r'data-error=(["\'])(?P<error>.+?)\1',
webpage, 'error', group='error')) webpage, 'error', group='error'))
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)

View File

@ -18,7 +18,7 @@ from ..utils import (
strip_or_none, strip_or_none,
timeconvert, timeconvert,
try_get, try_get,
unescapeHTML, unescape_html,
update_url_query, update_url_query,
url_basename, url_basename,
xpath_text, xpath_text,
@ -59,7 +59,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
req.add_header('User-Agent', 'curl/7') req.add_header('User-Agent', 'curl/7')
webpage = self._download_webpage(req, mtvn_id, webpage = self._download_webpage(req, mtvn_id,
'Downloading mobile page') 'Downloading mobile page')
metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url')) metrics_url = unescape_html(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
req = HEADRequest(metrics_url) req = HEADRequest(metrics_url)
response = self._request_webpage(req, mtvn_id, 'Resolving url') response = self._request_webpage(req, mtvn_id, 'Resolving url')
url = response.geturl() url = response.geturl()

View File

@ -5,7 +5,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from ..utils import unescapeHTML from ..utils import unescape_html
class NintendoIE(InfoExtractor): class NintendoIE(InfoExtractor):
@ -43,4 +43,4 @@ class NintendoIE(InfoExtractor):
webpage)] webpage)]
return self.playlist_result( return self.playlist_result(
entries, page_id, unescapeHTML(self._og_search_title(webpage, fatal=False))) entries, page_id, unescape_html(self._og_search_title(webpage, fatal=False)))

View File

@ -12,7 +12,7 @@ from ..utils import (
ExtractorError, ExtractorError,
fix_xml_ampersands, fix_xml_ampersands,
int_or_none, int_or_none,
orderedSet, ordered_set,
parse_duration, parse_duration,
qualities, qualities,
strip_jsonp, strip_jsonp,
@ -546,7 +546,7 @@ class NPOPlaylistBaseIE(NPOIE):
entries = [ entries = [
self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id) self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id)
for video_id in orderedSet(re.findall(self._PLAYLIST_ENTRY_RE, webpage)) for video_id in ordered_set(re.findall(self._PLAYLIST_ENTRY_RE, webpage))
] ]
playlist_title = self._html_search_regex( playlist_title = self._html_search_regex(

View File

@ -13,7 +13,7 @@ from ..utils import (
unified_strdate, unified_strdate,
int_or_none, int_or_none,
qualities, qualities,
unescapeHTML, unescape_html,
urlencode_postdata, urlencode_postdata,
) )
@ -137,7 +137,7 @@ class OdnoklassnikiIE(InfoExtractor):
raise ExtractorError(error, expected=True) raise ExtractorError(error, expected=True)
player = self._parse_json( player = self._parse_json(
unescapeHTML(self._search_regex( unescape_html(self._search_regex(
r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id, r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id,
webpage, 'player', group='player')), webpage, 'player', group='player')),
video_id) video_id)

View File

@ -15,7 +15,7 @@ from ..compat import (
from ..utils import ( from ..utils import (
check_executable, check_executable,
determine_ext, determine_ext,
encodeArgument, encode_argument,
ExtractorError, ExtractorError,
get_element_by_id, get_element_by_id,
get_exe_version, get_exe_version,
@ -233,13 +233,13 @@ class PhantomJSwrapper(object):
out, err = p.communicate() out, err = p.communicate()
if p.returncode != 0: if p.returncode != 0:
raise ExtractorError( raise ExtractorError(
'Executing JS failed\n:' + encodeArgument(err)) 'Executing JS failed\n:' + encode_argument(err))
with open(self._TMP_FILES['html'].name, 'rb') as f: with open(self._TMP_FILES['html'].name, 'rb') as f:
html = f.read().decode('utf-8') html = f.read().decode('utf-8')
self._load_cookies() self._load_cookies()
return (html, encodeArgument(out)) return (html, encode_argument(out))
class OpenloadIE(InfoExtractor): class OpenloadIE(InfoExtractor):

View File

@ -7,7 +7,7 @@ from ..compat import compat_urlparse
from ..utils import ( from ..utils import (
get_element_by_attribute, get_element_by_attribute,
qualities, qualities,
unescapeHTML, unescape_html,
) )
@ -66,7 +66,7 @@ class OraTVIE(InfoExtractor):
'id': self._search_regex( 'id': self._search_regex(
r'"id"\s*:\s*(\d+)', video_data, 'video id', default=display_id), r'"id"\s*:\s*(\d+)', video_data, 'video id', default=display_id),
'display_id': display_id, 'display_id': display_id,
'title': unescapeHTML(self._og_search_title(webpage)), 'title': unescape_html(self._og_search_title(webpage)),
'description': get_element_by_attribute( 'description': get_element_by_attribute(
'class', 'video_txt_decription', webpage), 'class', 'video_txt_decription', webpage),
'thumbnail': self._proto_relative_url(self._search_regex( 'thumbnail': self._proto_relative_url(self._search_regex(

View File

@ -10,10 +10,10 @@ from ..utils import (
float_or_none, float_or_none,
HEADRequest, HEADRequest,
int_or_none, int_or_none,
orderedSet, ordered_set,
remove_end, remove_end,
strip_jsonp, strip_jsonp,
unescapeHTML, unescape_html,
unified_strdate, unified_strdate,
url_or_none, url_or_none,
) )
@ -67,7 +67,7 @@ class ORFTVthekIE(InfoExtractor):
self._search_regex( self._search_regex(
r'<div[^>]+class=(["\']).*?VideoPlaylist.*?\1[^>]+data-jsb=(["\'])(?P<json>.+?)\2', r'<div[^>]+class=(["\']).*?VideoPlaylist.*?\1[^>]+data-jsb=(["\'])(?P<json>.+?)\2',
webpage, 'playlist', group='json'), webpage, 'playlist', group='json'),
playlist_id, transform_source=unescapeHTML)['playlist']['videos'] playlist_id, transform_source=unescape_html)['playlist']['videos']
entries = [] entries = []
for sd in data_jsb: for sd in data_jsb:
@ -357,7 +357,7 @@ class ORFFM4StoryIE(InfoExtractor):
webpage = self._download_webpage(url, story_id) webpage = self._download_webpage(url, story_id)
entries = [] entries = []
all_ids = orderedSet(re.findall(r'data-video(?:id)?="(\d+)"', webpage)) all_ids = ordered_set(re.findall(r'data-video(?:id)?="(\d+)"', webpage))
for idx, video_id in enumerate(all_ids): for idx, video_id in enumerate(all_ids):
data = self._download_json( data = self._download_json(
'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id, 'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,

View File

@ -11,7 +11,7 @@ from ..utils import (
int_or_none, int_or_none,
float_or_none, float_or_none,
js_to_json, js_to_json,
orderedSet, ordered_set,
strip_jsonp, strip_jsonp,
strip_or_none, strip_or_none,
unified_strdate, unified_strdate,
@ -458,7 +458,7 @@ class PBSIE(InfoExtractor):
r'<a[^>]+href=["\']#(?:video-|part)\d+["\'][^>]+data-cove[Ii]d=["\'](\d+)', r'<a[^>]+href=["\']#(?:video-|part)\d+["\'][^>]+data-cove[Ii]d=["\'](\d+)',
) )
for p in MULTI_PART_REGEXES: for p in MULTI_PART_REGEXES:
tabbed_videos = orderedSet(re.findall(p, webpage)) tabbed_videos = ordered_set(re.findall(p, webpage))
if tabbed_videos: if tabbed_videos:
return tabbed_videos, presumptive_id, upload_date, description return tabbed_videos, presumptive_id, upload_date, description

View File

@ -6,7 +6,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
parse_iso8601, parse_iso8601,
unescapeHTML, unescape_html,
) )
@ -129,7 +129,7 @@ class PeriscopeUserIE(PeriscopeBaseIE):
webpage = self._download_webpage(url, user_name) webpage = self._download_webpage(url, user_name)
data_store = self._parse_json( data_store = self._parse_json(
unescapeHTML(self._search_regex( unescape_html(self._search_regex(
r'data-store=(["\'])(?P<data>.+?)\1', r'data-store=(["\'])(?P<data>.+?)\1',
webpage, 'data store', default='{}', group='data')), webpage, 'data store', default='{}', group='data')),
user_name) user_name)

View File

@ -9,7 +9,7 @@ from ..utils import (
ExtractorError, ExtractorError,
dict_get, dict_get,
int_or_none, int_or_none,
unescapeHTML, unescape_html,
parse_iso8601, parse_iso8601,
) )
@ -103,7 +103,7 @@ class PikselIE(InfoExtractor):
formats.append({ formats.append({
'format_id': '-'.join(format_id), 'format_id': '-'.join(format_id),
'url': unescapeHTML(http_url), 'url': unescape_html(http_url),
'vbr': vbr, 'vbr': vbr,
'abr': abr, 'abr': abr,
'width': int_or_none(asset_file.get('videoWidth')), 'width': int_or_none(asset_file.get('videoWidth')),

View File

@ -15,7 +15,7 @@ from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
js_to_json, js_to_json,
orderedSet, ordered_set,
remove_quotes, remove_quotes,
str_to_int, str_to_int,
url_or_none, url_or_none,
@ -320,7 +320,7 @@ class PornHubPlaylistBaseIE(InfoExtractor):
self.url_result( self.url_result(
'http://www.%s/%s' % (host, video_url), 'http://www.%s/%s' % (host, video_url),
PornHubIE.ie_key(), video_title=title) PornHubIE.ie_key(), video_title=title)
for video_url, title in orderedSet(re.findall( for video_url, title in ordered_set(re.findall(
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"', r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
container)) container))
] ]

View File

@ -10,7 +10,7 @@ from ..utils import (
clean_html, clean_html,
ExtractorError, ExtractorError,
strip_jsonp, strip_jsonp,
unescapeHTML, unescape_html,
) )
@ -365,5 +365,5 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):
for song in cdlist['songlist']] for song in cdlist['songlist']]
list_name = cdlist.get('dissname') list_name = cdlist.get('dissname')
list_description = clean_html(unescapeHTML(cdlist.get('desc'))) list_description = clean_html(unescape_html(cdlist.get('desc')))
return self.playlist_result(entries, list_id, list_name, list_description) return self.playlist_result(entries, list_id, list_name, list_description)

View File

@ -17,7 +17,7 @@ from ..utils import (
parse_duration, parse_duration,
strip_or_none, strip_or_none,
try_get, try_get,
unescapeHTML, unescape_html,
unified_strdate, unified_strdate,
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
@ -272,7 +272,7 @@ class RaiPlayPlaylistIE(InfoExtractor):
title = self._html_search_meta( title = self._html_search_meta(
('programma', 'nomeProgramma'), webpage, 'title') ('programma', 'nomeProgramma'), webpage, 'title')
description = unescapeHTML(self._html_search_meta( description = unescape_html(self._html_search_meta(
('description', 'og:description'), webpage, 'description')) ('description', 'og:description'), webpage, 'description'))
entries = [] entries = []

View File

@ -10,7 +10,7 @@ from ..utils import (
int_or_none, int_or_none,
merge_dicts, merge_dicts,
try_get, try_get,
unescapeHTML, unescape_html,
unified_timestamp, unified_timestamp,
urljoin, urljoin,
) )
@ -99,7 +99,7 @@ class RayWenderlichIE(InfoExtractor):
self._search_regex( self._search_regex(
r'data-collection=(["\'])(?P<data>{.+?})\1', webpage, r'data-collection=(["\'])(?P<data>{.+?})\1', webpage,
'data collection', default='{}', group='data'), 'data collection', default='{}', group='data'),
display_id, transform_source=unescapeHTML, fatal=False) display_id, transform_source=unescape_html, fatal=False)
video_id = self._extract_video_id( video_id = self._extract_video_id(
data, lesson_id) or self._search_regex( data, lesson_id) or self._search_regex(
r'/videos/(\d+)/', thumbnail, 'video id') r'/videos/(\d+)/', thumbnail, 'video id')

View File

@ -7,7 +7,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
js_to_json, js_to_json,
int_or_none, int_or_none,
unescapeHTML, unescape_html,
) )
@ -34,7 +34,7 @@ class ReutersIE(InfoExtractor):
def get_json_value(key, fatal=False): def get_json_value(key, fatal=False):
return self._search_regex(r'"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal) return self._search_regex(r'"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal)
title = unescapeHTML(get_json_value('title', fatal=True)) title = unescape_html(get_json_value('title', fatal=True))
mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups() mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups()
mas_data = self._download_json( mas_data = self._download_json(

View File

@ -8,7 +8,7 @@ from ..compat import compat_str
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
unescapeHTML, unescape_html,
qualities, qualities,
) )
@ -59,8 +59,8 @@ class Revision3EmbedIE(InfoExtractor):
return { return {
'id': playlist_id, 'id': playlist_id,
'title': unescapeHTML(video_data['title']), 'title': unescape_html(video_data['title']),
'description': unescapeHTML(video_data.get('summary')), 'description': unescape_html(video_data.get('summary')),
'uploader': video_data.get('show', {}).get('name'), 'uploader': video_data.get('show', {}).get('name'),
'uploader_id': video_data.get('show', {}).get('slug'), 'uploader_id': video_data.get('show', {}).get('slug'),
'duration': int_or_none(video_data.get('duration')), 'duration': int_or_none(video_data.get('duration')),
@ -120,8 +120,8 @@ class Revision3IE(InfoExtractor):
info = { info = {
'id': page_id, 'id': page_id,
'display_id': display_id, 'display_id': display_id,
'title': unescapeHTML(page_data['name']), 'title': unescape_html(page_data['name']),
'description': unescapeHTML(page_data.get('summary')), 'description': unescape_html(page_data.get('summary')),
'timestamp': parse_iso8601(page_data.get('publishTime'), ' '), 'timestamp': parse_iso8601(page_data.get('publishTime'), ' '),
'author': page_data.get('author'), 'author': page_data.get('author'),
'uploader': show_data.get('name'), 'uploader': show_data.get('name'),

View File

@ -8,7 +8,7 @@ from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
strip_or_none, strip_or_none,
unescapeHTML, unescape_html,
urlencode_postdata, urlencode_postdata,
) )
@ -92,7 +92,7 @@ class RoosterTeethIE(InfoExtractor):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
episode = strip_or_none(unescapeHTML(self._search_regex( episode = strip_or_none(unescape_html(self._search_regex(
(r'videoTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', (r'videoTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
r'<title>(?P<title>[^<]+)</title>'), webpage, 'title', r'<title>(?P<title>[^<]+)</title>'), webpage, 'title',
default=None, group='title'))) default=None, group='title')))

View File

@ -10,7 +10,7 @@ from ..utils import (
parse_iso8601, parse_iso8601,
str_or_none, str_or_none,
try_get, try_get,
unescapeHTML, unescape_html,
url_or_none, url_or_none,
ExtractorError, ExtractorError,
) )
@ -48,8 +48,8 @@ class RteBaseIE(InfoExtractor):
continue continue
if not info_dict: if not info_dict:
title = unescapeHTML(show['title']) title = unescape_html(show['title'])
description = unescapeHTML(show.get('description')) description = unescape_html(show.get('description'))
thumbnail = show.get('thumbnail') thumbnail = show.get('thumbnail')
duration = float_or_none(show.get('duration'), 1000) duration = float_or_none(show.get('duration'), 1000)
timestamp = parse_iso8601(show.get('published')) timestamp = parse_iso8601(show.get('published'))

View File

@ -9,7 +9,7 @@ from ..utils import (
int_or_none, int_or_none,
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
unescapeHTML, unescape_html,
determine_ext, determine_ext,
) )
@ -226,5 +226,5 @@ class RTSIE(SRGSSRIE):
'view_count': int_or_none(info.get('plays')), 'view_count': int_or_none(info.get('plays')),
'uploader': info.get('programName'), 'uploader': info.get('programName'),
'timestamp': parse_iso8601(info.get('broadcast_date')), 'timestamp': parse_iso8601(info.get('broadcast_date')),
'thumbnail': unescapeHTML(info.get('preview_image_url')), 'thumbnail': unescape_html(info.get('preview_image_url')),
} }

View File

@ -10,7 +10,7 @@ from ..utils import (
update_url_query, update_url_query,
int_or_none, int_or_none,
determine_protocol, determine_protocol,
unescapeHTML, unescape_html,
) )
@ -95,7 +95,7 @@ class SendtoNewsIE(InfoExtractor):
}) })
info_dict.update({ info_dict.update({
'title': video['S_headLine'].strip(), 'title': video['S_headLine'].strip(),
'description': unescapeHTML(video.get('S_fullStory')), 'description': unescape_html(video.get('S_fullStory')),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'duration': float_or_none(video.get('SM_length')), 'duration': float_or_none(video.get('SM_length')),
'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '), 'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '),

View File

@ -10,7 +10,7 @@ from ..utils import (
xpath_attr, xpath_attr,
xpath_text, xpath_text,
xpath_element, xpath_element,
unescapeHTML, unescape_html,
unified_timestamp, unified_timestamp,
) )
@ -70,7 +70,7 @@ class SpringboardPlatformIE(InfoExtractor):
content = xpath_element( content = xpath_element(
item, './{http://search.yahoo.com/mrss/}content', 'content', item, './{http://search.yahoo.com/mrss/}content', 'content',
fatal=True) fatal=True)
title = unescapeHTML(xpath_text(item, './title', 'title', fatal=True)) title = unescape_html(xpath_text(item, './title', 'title', fatal=True))
video_url = content.attrib['url'] video_url = content.attrib['url']
@ -84,7 +84,7 @@ class SpringboardPlatformIE(InfoExtractor):
width = int_or_none(content.get('width')) width = int_or_none(content.get('width'))
height = int_or_none(content.get('height')) height = int_or_none(content.get('height'))
description = unescapeHTML(xpath_text( description = unescape_html(xpath_text(
item, './description', 'description')) item, './description', 'description'))
thumbnail = xpath_attr( thumbnail = xpath_attr(
item, './{http://search.yahoo.com/mrss/}thumbnail', 'url', item, './{http://search.yahoo.com/mrss/}thumbnail', 'url',

View File

@ -5,8 +5,8 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
orderedSet, ordered_set,
unescapeHTML, unescape_html,
) )
@ -66,9 +66,9 @@ class StanfordOpenClassroomIE(InfoExtractor):
r'(?s)<description>([^<]+)</description>', r'(?s)<description>([^<]+)</description>',
coursepage, 'description', fatal=False) coursepage, 'description', fatal=False)
links = orderedSet(re.findall(r'<a href="(VideoPage\.php\?[^"]+)">', coursepage)) links = ordered_set(re.findall(r'<a href="(VideoPage\.php\?[^"]+)">', coursepage))
info['entries'] = [self.url_result( info['entries'] = [self.url_result(
'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l) 'http://openclassroom.stanford.edu/MainFolder/%s' % unescape_html(l)
) for l in links] ) for l in links]
return info return info
else: # Root page else: # Root page
@ -84,8 +84,8 @@ class StanfordOpenClassroomIE(InfoExtractor):
rootpage = self._download_webpage(rootURL, info['id'], rootpage = self._download_webpage(rootURL, info['id'],
errnote='Unable to download course info page') errnote='Unable to download course info page')
links = orderedSet(re.findall(r'<a href="(CoursePage\.php\?[^"]+)">', rootpage)) links = ordered_set(re.findall(r'<a href="(CoursePage\.php\?[^"]+)">', rootpage))
info['entries'] = [self.url_result( info['entries'] = [self.url_result(
'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l) 'http://openclassroom.stanford.edu/MainFolder/%s' % unescape_html(l)
) for l in links] ) for l in links]
return info return info

View File

@ -7,7 +7,7 @@ from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
js_to_json, js_to_json,
unescapeHTML, unescape_html,
) )
@ -59,7 +59,7 @@ class StitcherIE(InfoExtractor):
r'(?s)var\s+stitcher(?:Config)?\s*=\s*({.+?});\n', webpage, 'episode config')), r'(?s)var\s+stitcher(?:Config)?\s*=\s*({.+?});\n', webpage, 'episode config')),
display_id)['config']['episode'] display_id)['config']['episode']
title = unescapeHTML(episode['title']) title = unescape_html(episode['title'])
formats = [{ formats = [{
'url': episode[episode_key], 'url': episode[episode_key],
'ext': determine_ext(episode[episode_key]) or 'mp3', 'ext': determine_ext(episode[episode_key]) or 'mp3',

View File

@ -12,7 +12,7 @@ from ..utils import (
determine_ext, determine_ext,
dict_get, dict_get,
int_or_none, int_or_none,
orderedSet, ordered_set,
strip_or_none, strip_or_none,
try_get, try_get,
urljoin, urljoin,
@ -363,7 +363,7 @@ class SVTPageIE(InfoExtractor):
entries = [ entries = [
self.url_result( self.url_result(
'svt:%s' % video_id, ie=SVTPlayIE.ie_key(), video_id=video_id) 'svt:%s' % video_id, ie=SVTPlayIE.ie_key(), video_id=video_id)
for video_id in orderedSet(re.findall( for video_id in ordered_set(re.findall(
r'data-video-id=["\'](\d+)', webpage))] r'data-video-id=["\'](\d+)', webpage))]
title = strip_or_none(self._og_search_title(webpage, default=None)) title = strip_or_none(self._og_search_title(webpage, default=None))

View File

@ -10,7 +10,7 @@ from ..utils import (
int_or_none, int_or_none,
parse_duration, parse_duration,
str_to_int, str_to_int,
unescapeHTML, unescape_html,
xpath_text, xpath_text,
) )
@ -102,7 +102,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
def extract_video_url(vl): def extract_video_url(vl):
# Any URL modification now results in HTTP Error 403: Forbidden # Any URL modification now results in HTTP Error 403: Forbidden
return unescapeHTML(vl.text) return unescape_html(vl.text)
video_link = cfg_xml.find('./videoLink') video_link = cfg_xml.find('./videoLink')
if video_link is not None: if video_link is not None:

View File

@ -4,7 +4,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
unescapeHTML, unescape_html,
) )
@ -46,7 +46,7 @@ class TVN24IE(InfoExtractor):
self._search_regex( self._search_regex(
r'\b%s=(["\'])(?P<json>(?!\1).+?)\1' % attr, webpage, r'\b%s=(["\'])(?P<json>(?!\1).+?)\1' % attr, webpage,
name, group='json', fatal=fatal) or '{}', name, group='json', fatal=fatal) or '{}',
video_id, transform_source=unescapeHTML, fatal=fatal) video_id, transform_source=unescape_html, fatal=fatal)
quality_data = extract_json('data-quality', 'formats') quality_data = extract_json('data-quality', 'formats')

View File

@ -6,7 +6,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
unescapeHTML, unescape_html,
url_or_none, url_or_none,
) )
@ -97,7 +97,7 @@ class TVNetIE(InfoExtractor):
else: else:
is_live = None is_live = None
data_file = unescapeHTML(self._search_regex( data_file = unescape_html(self._search_regex(
r'data-file=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage, r'data-file=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage,
'data file', group='url')) 'data file', group='url'))
@ -125,7 +125,7 @@ class TVNetIE(InfoExtractor):
}) })
thumbnail = self._og_search_thumbnail( thumbnail = self._og_search_thumbnail(
webpage, default=None) or unescapeHTML( webpage, default=None) or unescape_html(
self._search_regex( self._search_regex(
r'data-image=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage, r'data-image=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage,
'thumbnail', default=None, group='url')) 'thumbnail', default=None, group='url'))

View File

@ -19,7 +19,7 @@ from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
int_or_none, int_or_none,
orderedSet, ordered_set,
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
qualities, qualities,
@ -389,7 +389,7 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
break break
offset += limit offset += limit
return self.playlist_result( return self.playlist_result(
[self._make_url_result(entry) for entry in orderedSet(entries)], [self._make_url_result(entry) for entry in ordered_set(entries)],
channel_id, channel_name) channel_id, channel_name)
def _make_url_result(self, url): def _make_url_result(self, url):

View File

@ -19,7 +19,7 @@ from ..utils import (
js_to_json, js_to_json,
sanitized_Request, sanitized_Request,
try_get, try_get,
unescapeHTML, unescape_html,
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
) )
@ -68,7 +68,7 @@ class UdemyIE(InfoExtractor):
def _extract_course_info(self, webpage, video_id): def _extract_course_info(self, webpage, video_id):
course = self._parse_json( course = self._parse_json(
unescapeHTML(self._search_regex( unescape_html(self._search_regex(
r'ng-init=["\'].*\bcourse=({.+?})[;"\']', r'ng-init=["\'].*\bcourse=({.+?})[;"\']',
webpage, 'course', default='{}')), webpage, 'course', default='{}')),
video_id, fatal=False) or {} video_id, fatal=False) or {}
@ -80,7 +80,7 @@ class UdemyIE(InfoExtractor):
def combine_url(base_url, url): def combine_url(base_url, url):
return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url
checkout_url = unescapeHTML(self._search_regex( checkout_url = unescape_html(self._search_regex(
r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/(?:payment|cart)/checkout/.+?)\1', r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/(?:payment|cart)/checkout/.+?)\1',
webpage, 'checkout url', group='url', default=None)) webpage, 'checkout url', group='url', default=None))
if checkout_url: if checkout_url:
@ -90,7 +90,7 @@ class UdemyIE(InfoExtractor):
% (course_id, combine_url(base_url, checkout_url)), % (course_id, combine_url(base_url, checkout_url)),
expected=True) expected=True)
enroll_url = unescapeHTML(self._search_regex( enroll_url = unescape_html(self._search_regex(
r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/course/subscribe/.+?)\1', r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/course/subscribe/.+?)\1',
webpage, 'enroll url', group='url', default=None)) webpage, 'enroll url', group='url', default=None))
if enroll_url: if enroll_url:
@ -365,7 +365,7 @@ class UdemyIE(InfoExtractor):
self._search_regex( self._search_regex(
r'videojs-setup-data=(["\'])(?P<data>{.+?})\1', view_html, r'videojs-setup-data=(["\'])(?P<data>{.+?})\1', view_html,
'setup data', default='{}', group='data'), video_id, 'setup data', default='{}', group='data'), video_id,
transform_source=unescapeHTML, fatal=False) transform_source=unescape_html, fatal=False)
if data and isinstance(data, dict): if data and isinstance(data, dict):
extract_formats(data.get('sources')) extract_formats(data.get('sources'))
if not duration: if not duration:
@ -377,7 +377,7 @@ class UdemyIE(InfoExtractor):
self._search_regex( self._search_regex(
r'text-tracks=(["\'])(?P<data>\[.+?\])\1', view_html, r'text-tracks=(["\'])(?P<data>\[.+?\])\1', view_html,
'text tracks', default='{}', group='data'), video_id, 'text tracks', default='{}', group='data'), video_id,
transform_source=lambda s: js_to_json(unescapeHTML(s)), transform_source=lambda s: js_to_json(unescape_html(s)),
fatal=False) fatal=False)
extract_subtitles(text_tracks) extract_subtitles(text_tracks)

View File

@ -6,7 +6,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
unified_strdate, unified_strdate,
unescapeHTML, unescape_html,
) )
@ -37,7 +37,7 @@ class UstudioIE(InfoExtractor):
def extract(kind): def extract(kind):
return [{ return [{
'url': unescapeHTML(item.attrib['url']), 'url': unescape_html(item.attrib['url']),
'width': int_or_none(item.get('width')), 'width': int_or_none(item.get('width')),
'height': int_or_none(item.get('height')), 'height': int_or_none(item.get('height')),
} for item in config.findall('./qualities/quality/%s' % kind) if item.get('url')] } for item in config.findall('./qualities/quality/%s' % kind) if item.get('url')]

View File

@ -30,7 +30,7 @@ from ..utils import (
unified_timestamp, unified_timestamp,
unsmuggle_url, unsmuggle_url,
urlencode_postdata, urlencode_postdata,
unescapeHTML, unescape_html,
) )
@ -450,7 +450,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
for mobj in re.finditer( for mobj in re.finditer(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1', r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1',
webpage): webpage):
urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url)) urls.append(VimeoIE._smuggle_referrer(unescape_html(mobj.group('url')), url))
PLAIN_EMBED_RE = ( PLAIN_EMBED_RE = (
# Look for embedded (swf embed) Vimeo player # Look for embedded (swf embed) Vimeo player
r'<embed[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1', r'<embed[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1',

View File

@ -15,11 +15,11 @@ from ..utils import (
ExtractorError, ExtractorError,
get_element_by_class, get_element_by_class,
int_or_none, int_or_none,
orderedSet, ordered_set,
remove_start, remove_start,
str_or_none, str_or_none,
str_to_int, str_to_int,
unescapeHTML, unescape_html,
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
@ -422,7 +422,7 @@ class VKIE(VKBaseIE):
'player params'), 'player params'),
video_id)['params'][0] video_id)['params'][0]
title = unescapeHTML(data['md_title']) title = unescape_html(data['md_title'])
# 2 = live # 2 = live
# 3 = post live (finished live) # 3 = post live (finished live)
@ -514,9 +514,9 @@ class VKUserVideosIE(VKBaseIE):
entries = [ entries = [
self.url_result( self.url_result(
'http://vk.com/video' + video_id, 'VK', video_id=video_id) 'http://vk.com/video' + video_id, 'VK', video_id=video_id)
for video_id in orderedSet(re.findall(r'href="/video(-?[0-9_]+)"', webpage))] for video_id in ordered_set(re.findall(r'href="/video(-?[0-9_]+)"', webpage))]
title = unescapeHTML(self._search_regex( title = unescape_html(self._search_regex(
r'<title>\s*([^<]+?)\s+\|\s+\d+\s+videos', r'<title>\s*([^<]+?)\s+\|\s+\d+\s+videos',
webpage, 'title', default=page_id)) webpage, 'title', default=page_id))
@ -623,7 +623,7 @@ class VKWallPostIE(VKBaseIE):
audios = self._parse_json( audios = self._parse_json(
self._search_regex( self._search_regex(
r'<!json>(.+?)<!>', al_audio, 'audios', default='[]'), r'<!json>(.+?)<!>', al_audio, 'audios', default='[]'),
post_id, fatal=False, transform_source=unescapeHTML) post_id, fatal=False, transform_source=unescape_html)
if isinstance(audios, list): if isinstance(audios, list):
for audio in audios: for audio in audios:
a = Audio._make(audio[:6]) a = Audio._make(audio[:6])
@ -646,6 +646,6 @@ class VKWallPostIE(VKBaseIE):
title = 'Wall post %s' % post_id title = 'Wall post %s' % post_id
return self.playlist_result( return self.playlist_result(
orderedSet(entries), post_id, ordered_set(entries), post_id,
'%s - %s' % (uploader, title) if uploader else title, '%s - %s' % (uploader, title) if uploader else title,
description) description)

View File

@ -2,7 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import unescapeHTML from ..utils import unescape_html
class VODPlatformIE(InfoExtractor): class VODPlatformIE(InfoExtractor):
@ -22,7 +22,7 @@ class VODPlatformIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = unescapeHTML(self._og_search_title(webpage)) title = unescape_html(self._og_search_title(webpage))
hidden_inputs = self._hidden_inputs(webpage) hidden_inputs = self._hidden_inputs(webpage)
formats = self._extract_wowza_formats( formats = self._extract_wowza_formats(

View File

@ -9,7 +9,7 @@ from ..utils import (
int_or_none, int_or_none,
parse_age_limit, parse_age_limit,
smuggle_url, smuggle_url,
unescapeHTML, unescape_html,
) )
@ -49,7 +49,7 @@ class VrakIE(InfoExtractor):
self._search_regex( self._search_regex(
r'data-player-options-content=(["\'])(?P<content>{.+?})\1', r'data-player-options-content=(["\'])(?P<content>{.+?})\1',
webpage, 'content', default='{}', group='content'), webpage, 'content', default='{}', group='content'),
video_id, transform_source=unescapeHTML) video_id, transform_source=unescape_html)
ref_id = content.get('refId') or self._search_regex( ref_id = content.get('refId') or self._search_regex(
r'refId&quot;:&quot;([^&]+)&quot;', webpage, 'ref id') r'refId&quot;:&quot;([^&]+)&quot;', webpage, 'ref id')

View File

@ -10,7 +10,7 @@ from ..utils import (
js_to_json, js_to_json,
strip_or_none, strip_or_none,
try_get, try_get,
unescapeHTML, unescape_html,
unified_timestamp, unified_timestamp,
) )
@ -72,7 +72,7 @@ class WatchBoxIE(InfoExtractor):
self._search_regex( self._search_regex(
r'data-player-conf=(["\'])(?P<data>{.+?})\1', webpage, r'data-player-conf=(["\'])(?P<data>{.+?})\1', webpage,
'player config', default='{}', group='data'), 'player config', default='{}', group='data'),
video_id, transform_source=unescapeHTML, fatal=False) video_id, transform_source=unescape_html, fatal=False)
if not player_config: if not player_config:
player_config = self._parse_json( player_config = self._parse_json(

View File

@ -6,7 +6,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
orderedSet, ordered_set,
) )
@ -139,7 +139,7 @@ class WebOfStoriesPlaylistIE(InfoExtractor):
self.url_result( self.url_result(
'http://www.webofstories.com/play/%s' % video_id, 'http://www.webofstories.com/play/%s' % video_id,
'WebOfStories', video_id=video_id) 'WebOfStories', video_id=video_id)
for video_id in orderedSet(re.findall(r'\bid=["\']td_(\d+)', webpage)) for video_id in ordered_set(re.findall(r'\bid=["\']td_(\d+)', webpage))
] ]
title = self._search_regex( title = self._search_regex(

View File

@ -7,7 +7,7 @@ from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
float_or_none, float_or_none,
unescapeHTML, unescape_html,
) )
@ -48,7 +48,7 @@ class WistiaIE(InfoExtractor):
match = re.search( match = re.search(
r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/iframe/.+?)\1', webpage) r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/iframe/.+?)\1', webpage)
if match: if match:
return unescapeHTML(match.group('url')) return unescape_html(match.group('url'))
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage) match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
if match: if match:

View File

@ -6,7 +6,7 @@ from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
from ..utils import ( from ..utils import (
try_get, try_get,
unescapeHTML, unescape_html,
url_or_none, url_or_none,
urljoin, urljoin,
) )
@ -123,7 +123,7 @@ class WWEPlaylistIE(WWEBaseIE):
for mobj in re.finditer( for mobj in re.finditer(
r'data-video\s*=\s*(["\'])(?P<data>{.+?})\1', webpage): r'data-video\s*=\s*(["\'])(?P<data>{.+?})\1', webpage):
video = self._parse_json( video = self._parse_json(
mobj.group('data'), display_id, transform_source=unescapeHTML, mobj.group('data'), display_id, transform_source=unescape_html,
fatal=False) fatal=False)
if not video: if not video:
continue continue

View File

@ -7,7 +7,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
js_to_json, js_to_json,
orderedSet, ordered_set,
parse_duration, parse_duration,
sanitized_Request, sanitized_Request,
str_to_int, str_to_int,
@ -167,7 +167,7 @@ class XTubeUserIE(InfoExtractor):
if not html: if not html:
break break
for video_id in orderedSet([video_id for _, video_id in re.findall( for video_id in ordered_set([video_id for _, video_id in re.findall(
r'data-plid=(["\'])(.+?)\1', html)]): r'data-plid=(["\'])(.+?)\1', html)]):
entries.append(self.url_result('xtube:%s' % video_id, XTubeIE.ie_key())) entries.append(self.url_result('xtube:%s' % video_id, XTubeIE.ie_key()))

View File

@ -18,7 +18,7 @@ from ..utils import (
int_or_none, int_or_none,
mimetype2ext, mimetype2ext,
smuggle_url, smuggle_url,
unescapeHTML, unescape_html,
) )
from .brightcove import ( from .brightcove import (
@ -415,7 +415,7 @@ class YahooIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': unescapeHTML(meta['title']), 'title': unescape_html(meta['title']),
'formats': formats, 'formats': formats,
'description': clean_html(meta['description']), 'description': clean_html(meta['description']),
'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage), 'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage),

View File

@ -8,7 +8,7 @@ from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
qualities, qualities,
unescapeHTML, unescape_html,
url_or_none, url_or_none,
) )
@ -35,7 +35,7 @@ class YapFilesIE(InfoExtractor):
@staticmethod @staticmethod
def _extract_urls(webpage): def _extract_urls(webpage):
return [unescapeHTML(mobj.group('url')) for mobj in re.finditer( return [unescape_html(mobj.group('url')) for mobj in re.finditer(
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.*?)\1' r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.*?)\1'
% YapFilesIE._YAPFILES_URL, webpage)] % YapFilesIE._YAPFILES_URL, webpage)]

View File

@ -7,7 +7,7 @@ from ..utils import (
int_or_none, int_or_none,
sanitized_Request, sanitized_Request,
str_to_int, str_to_int,
unescapeHTML, unescape_html,
unified_strdate, unified_strdate,
url_or_none, url_or_none,
) )
@ -112,7 +112,7 @@ class YouPornIE(InfoExtractor):
links.append(aes_decrypt_text(encrypted_link, title, 32).decode('utf-8')) links.append(aes_decrypt_text(encrypted_link, title, 32).decode('utf-8'))
formats = [] formats = []
for video_url in set(unescapeHTML(link) for link in links): for video_url in set(unescape_html(link) for link in links):
f = { f = {
'url': video_url, 'url': video_url,
} }

View File

@ -34,7 +34,7 @@ from ..utils import (
get_element_by_id, get_element_by_id,
int_or_none, int_or_none,
mimetype2ext, mimetype2ext,
orderedSet, ordered_set,
parse_codecs, parse_codecs,
parse_duration, parse_duration,
qualities, qualities,
@ -44,7 +44,7 @@ from ..utils import (
str_or_none, str_or_none,
str_to_int, str_to_int,
try_get, try_get,
unescapeHTML, unescape_html,
unified_strdate, unified_strdate,
unsmuggle_url, unsmuggle_url,
uppercase_escape, uppercase_escape,
@ -312,7 +312,7 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
if 'index' in mobj.groupdict() and mobj.group('id') == '0': if 'index' in mobj.groupdict() and mobj.group('id') == '0':
continue continue
video_id = mobj.group('id') video_id = mobj.group('id')
video_title = unescapeHTML(mobj.group('title')) video_title = unescape_html(mobj.group('title'))
if video_title: if video_title:
video_title = video_title.strip() video_title = video_title.strip()
try: try:
@ -327,7 +327,7 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
def _process_page(self, content): def _process_page(self, content):
for playlist_id in orderedSet(re.findall( for playlist_id in ordered_set(re.findall(
r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
content)): content)):
yield self.url_result( yield self.url_result(
@ -1423,7 +1423,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_urls(webpage): def _extract_urls(webpage):
# Embedded YouTube player # Embedded YouTube player
entries = [ entries = [
unescapeHTML(mobj.group('url')) unescape_html(mobj.group('url'))
for mobj in re.finditer(r'''(?x) for mobj in re.finditer(r'''(?x)
(?: (?:
<iframe[^>]+?src=| <iframe[^>]+?src=|
@ -1440,7 +1440,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# lazyYT YouTube embed # lazyYT YouTube embed
entries.extend(list(map( entries.extend(list(map(
unescapeHTML, unescape_html,
re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)))) re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
# Wordpress "YouTube Video Importer" plugin # Wordpress "YouTube Video Importer" plugin
@ -1730,7 +1730,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else: else:
fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage) fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
if fd_mobj: if fd_mobj:
video_description = unescapeHTML(fd_mobj.group(1)) video_description = unescape_html(fd_mobj.group(1))
else: else:
video_description = '' video_description = ''
@ -2047,7 +2047,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
''', ''',
video_webpage) video_webpage)
if m_music: if m_music:
video_alt_title = remove_quotes(unescapeHTML(m_music.group('title'))) video_alt_title = remove_quotes(unescape_html(m_music.group('title')))
video_creator = clean_html(m_music.group('creator')) video_creator = clean_html(m_music.group('creator'))
else: else:
video_alt_title = video_creator = None video_alt_title = video_creator = None
@ -2064,7 +2064,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>', r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
video_webpage) video_webpage)
if m_episode: if m_episode:
series = unescapeHTML(m_episode.group('series')) series = unescape_html(m_episode.group('series'))
season_number = int(m_episode.group('season')) season_number = int(m_episode.group('season'))
episode_number = int(m_episode.group('episode')) episode_number = int(m_episode.group('episode'))
else: else:
@ -2082,7 +2082,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_categories = None video_categories = None
video_tags = [ video_tags = [
unescapeHTML(m.group('content')) unescape_html(m.group('content'))
for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)] for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
def _extract_count(count_name): def _extract_count(count_name):
@ -2394,7 +2394,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id) url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
webpage = self._download_webpage( webpage = self._download_webpage(
url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n)) url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
new_ids = orderedSet(re.findall( new_ids = ordered_set(re.findall(
r'''(?xs)data-video-username=".*?".*? r'''(?xs)data-video-username=".*?".*?
href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id), href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
webpage)) webpage))
@ -2875,7 +2875,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
# 'recommended' feed has infinite 'load more' and each new portion spins # 'recommended' feed has infinite 'load more' and each new portion spins
# the same videos in (sometimes) slightly different order, so we'll check # the same videos in (sometimes) slightly different order, so we'll check
# for unicity and break when portion has no new videos # for unicity and break when portion has no new videos
new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches))) new_ids = list(filter(lambda video_id: video_id not in ids, ordered_set(matches)))
if not new_ids: if not new_ids:
break break

View File

@ -9,7 +9,7 @@ from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
NO_DEFAULT, NO_DEFAULT,
orderedSet, ordered_set,
parse_codecs, parse_codecs,
qualities, qualities,
try_get, try_get,
@ -267,7 +267,7 @@ class ZDFChannelIE(ZDFBaseIE):
entries = [ entries = [
self.url_result(item_url, ie=ZDFIE.ie_key()) self.url_result(item_url, ie=ZDFIE.ie_key())
for item_url in orderedSet(re.findall( for item_url in ordered_set(re.findall(
r'data-plusbar-url=["\'](http.+?\.html)', webpage))] r'data-plusbar-url=["\'](http.+?\.html)', webpage))]
return self.playlist_result( return self.playlist_result(

Some files were not shown because too many files have changed in this diff Show More