From 97dae9ae07a1ecd58a412a15162826fde604db60 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 5 Oct 2013 16:12:21 +0200 Subject: [PATCH 01/21] [bliptv] Make sure video ID is a string --- youtube_dl/extractor/bliptv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index 08b28c994..493504f75 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -115,7 +115,7 @@ class BlipTVIE(InfoExtractor): ext = umobj.group(1) info = { - 'id': data['item_id'], + 'id': compat_str(data['item_id']), 'url': video_url, 'uploader': data['display_name'], 'upload_date': upload_date, From e94b783c741b720ab4ee70eb7fc8764be89d63d5 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 5 Oct 2013 16:38:33 +0200 Subject: [PATCH 02/21] [googleplus] Fix upload_date detection --- youtube_dl/extractor/googleplus.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index 8895ad289..ab12d7e93 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -41,7 +41,8 @@ class GooglePlusIE(InfoExtractor): # Extract update date upload_date = self._html_search_regex( - ['title="Timestamp">(.*?)', r'(.+?)'], + r'''(?x) + ([0-9]{4}-[0-9]{2}-[0-9]{2})''', webpage, u'upload date', fatal=False) if upload_date: # Convert timestring to a format suitable for filename From 00fcc17aeeab11ce694699bf183d33a3af75aab6 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Sat, 5 Oct 2013 15:55:58 -0400 Subject: [PATCH 03/21] add capability to suppress expected warnings in tests --- test/helper.py | 15 +++++++++++++++ test/test_dailymotion_subtitles.py | 5 +++-- test/test_youtube_subtitles.py | 4 ++-- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/test/helper.py b/test/helper.py index a2b468b50..63f56841f 100644 --- a/test/helper.py +++ b/test/helper.py @@ -1,6 +1,8 @@ import io import json import os.path +import re +import types import youtube_dl.extractor from youtube_dl import YoutubeDL, YoutubeDLHandler @@ -32,6 +34,19 @@ class FakeYDL(YoutubeDL): raise Exception(s) def download(self, x): self.result.append(x) + # def expect_warning(self, regex): + # # Silence an expected warning matching a regex + # def report_warning(self, message): + # if re.match(regex, message): return + # super(FakeYDL, self).report_warning(regex) + # self.report_warning = types.MethodType(report_warning, self) + def expect_warning(self, regex): + # Silence an expected warning matching a regex + old_report_warning = self.report_warning + def report_warning(self, message): + if re.match(regex, message): return + old_report_warning(message) + self.report_warning = types.MethodType(report_warning, self) def get_testcases(): for ie in youtube_dl.extractor.gen_extractors(): diff --git a/test/test_dailymotion_subtitles.py b/test/test_dailymotion_subtitles.py index 83c65d57e..ed2ad311d 100644 --- a/test/test_dailymotion_subtitles.py +++ b/test/test_dailymotion_subtitles.py @@ -2,8 +2,6 @@ import sys import unittest -import json -import io import hashlib # Allow direct execution @@ -45,15 +43,18 @@ class TestDailymotionSubtitles(unittest.TestCase): subtitles = self.getSubtitles() self.assertEqual(len(subtitles.keys()), 5) def test_list_subtitles(self): + self.DL.expect_warning(u'Automatic Captions not supported by this server') self.DL.params['listsubtitles'] = True info_dict = self.getInfoDict() self.assertEqual(info_dict, None) def test_automatic_captions(self): + self.DL.expect_warning(u'Automatic Captions not supported by this server') self.DL.params['writeautomaticsub'] = True self.DL.params['subtitleslang'] = ['en'] subtitles = self.getSubtitles() self.assertTrue(len(subtitles.keys()) == 0) def test_nosubtitles(self): + self.DL.expect_warning(u'video doesn\'t have subtitles') self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index 168e6c66c..f9b0c1ad0 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -2,8 +2,6 @@ import sys import unittest -import json -import io import hashlib # Allow direct execution @@ -56,6 +54,7 @@ class TestYoutubeSubtitles(unittest.TestCase): subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7') def test_youtube_list_subtitles(self): + self.DL.expect_warning(u'Video doesn\'t have automatic captions') self.DL.params['listsubtitles'] = True info_dict = self.getInfoDict() self.assertEqual(info_dict, None) @@ -66,6 +65,7 @@ class TestYoutubeSubtitles(unittest.TestCase): subtitles = self.getSubtitles() self.assertTrue(subtitles['it'] is not None) def test_youtube_nosubtitles(self): + self.DL.expect_warning(u'video doesn\'t have subtitles') self.url = 'sAjKT8FhjI8' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True From 79cfb46d42cf0cd296acf7f0689d2ad4b2e7f971 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Sat, 5 Oct 2013 16:08:48 -0400 Subject: [PATCH 04/21] add tox configuration file for easy testing --- .gitignore | 1 + tox.ini | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 tox.ini diff --git a/.gitignore b/.gitignore index 24fdb3626..7dd0ad09b 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,4 @@ updates_key.pem *.mp4 *.part test/testdata +.tox diff --git a/tox.ini b/tox.ini new file mode 100644 index 000000000..53b461fdb --- /dev/null +++ b/tox.ini @@ -0,0 +1,5 @@ +[tox] +envlist = py26,py27,py33 +[testenv] +deps = nose +commands = nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose test From 8932a66e49dda60bdb6ddb1447df63fea5c4f320 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Sat, 5 Oct 2013 16:38:37 -0400 Subject: [PATCH 05/21] [fixup] remove unnecessary commented function --- test/helper.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/test/helper.py b/test/helper.py index 63f56841f..8e641e3cb 100644 --- a/test/helper.py +++ b/test/helper.py @@ -34,12 +34,6 @@ class FakeYDL(YoutubeDL): raise Exception(s) def download(self, x): self.result.append(x) - # def expect_warning(self, regex): - # # Silence an expected warning matching a regex - # def report_warning(self, message): - # if re.match(regex, message): return - # super(FakeYDL, self).report_warning(regex) - # self.report_warning = types.MethodType(report_warning, self) def expect_warning(self, regex): # Silence an expected warning matching a regex old_report_warning = self.report_warning From c1c9a79c49e8656f3244744e6f4e336e47a03206 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 6 Oct 2013 04:27:09 +0200 Subject: [PATCH 06/21] Add basic --download-archive option Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time. When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped. --- youtube_dl/YoutubeDL.py | 32 ++++++++++++++ youtube_dl/__init__.py | 4 ++ youtube_dl/utils.py | 96 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 132 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 2503fd09b..1f5f75e30 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -3,6 +3,7 @@ from __future__ import absolute_import +import errno import io import os import re @@ -84,6 +85,9 @@ class YoutubeDL(object): cachedir: Location of the cache files in the filesystem. None to disable filesystem cache. noplaylist: Download single video instead of a playlist if in doubt. + downloadarchive: File name of a file where all downloads are recorded. + Videos already present in the file are not downloaded + again. The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: @@ -309,6 +313,9 @@ class YoutubeDL(object): dateRange = self.params.get('daterange', DateRange()) if date not in dateRange: return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) + if self.in_download_archive(info_dict): + return (u'%(title)s) has already been recorded in archive' + % info_dict) return None def extract_info(self, url, download=True, ie_key=None, extra_info={}): @@ -578,6 +585,8 @@ class YoutubeDL(object): self.report_error(u'postprocessing: %s' % str(err)) return + self.record_download_archive(info_dict) + def download(self, url_list): """Download a given list of URLs.""" if len(url_list) > 1 and self.fixed_template(): @@ -617,3 +626,26 @@ class YoutubeDL(object): os.remove(encodeFilename(filename)) except (IOError, OSError): self.report_warning(u'Unable to remove downloaded video file') + + def in_download_archive(self, info_dict): + fn = self.params.get('download_archive') + if fn is None: + return False + vid_id = info_dict['extractor'] + u' ' + info_dict['id'] + try: + with locked_file(fn, 'r', encoding='utf-8') as archive_file: + for line in archive_file: + if line.strip() == vid_id: + return True + except IOError as ioe: + if ioe.errno != errno.ENOENT: + raise + return False + + def record_download_archive(self, info_dict): + fn = self.params.get('download_archive') + if fn is None: + return + vid_id = info_dict['extractor'] + u' ' + info_dict['id'] + with locked_file(fn, 'a', encoding='utf-8') as archive_file: + archive_file.write(vid_id + u'\n') diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 03df835f2..a680d7c55 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -188,6 +188,9 @@ def parseOpts(overrideArguments=None): selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None) selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None) selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False) + selection.add_option('--download-archive', metavar='FILE', + dest='download_archive', + help='Download only videos not present in the archive file. Record all downloaded videos in it.') authentication.add_option('-u', '--username', @@ -631,6 +634,7 @@ def _real_main(argv=None): 'daterange': date, 'cachedir': opts.cachedir, 'youtube_print_sig_code': opts.youtube_print_sig_code, + 'download_archive': opts.download_archive, }) if opts.verbose: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f5f9cde99..a463049a4 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -830,3 +830,99 @@ def get_cachedir(params={}): cache_root = os.environ.get('XDG_CACHE_HOME', os.path.expanduser('~/.cache')) return params.get('cachedir', os.path.join(cache_root, 'youtube-dl')) + + +# Cross-platform file locking +if sys.platform == 'win32': + import ctypes.wintypes + import msvcrt + + class OVERLAPPED(ctypes.Structure): + _fields_ = [ + ('Internal', ctypes.wintypes.LPVOID), + ('InternalHigh', ctypes.wintypes.LPVOID), + ('Offset', ctypes.wintypes.DWORD), + ('OffsetHigh', ctypes.wintypes.DWORD), + ('hEvent', ctypes.wintypes.HANDLE), + ] + + kernel32 = ctypes.windll.kernel32 + LockFileEx = kernel32.LockFileEx + LockFileEx.argtypes = [ + ctypes.wintypes.HANDLE, # hFile + ctypes.wintypes.DWORD, # dwFlags + ctypes.wintypes.DWORD, # dwReserved + ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow + ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh + ctypes.POINTER(OVERLAPPED) # Overlapped + ] + LockFileEx.restype = ctypes.wintypes.BOOL + UnlockFileEx = kernel32.UnlockFileEx + UnlockFileEx.argtypes = [ + ctypes.wintypes.HANDLE, # hFile + ctypes.wintypes.DWORD, # dwReserved + ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow + ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh + ctypes.POINTER(OVERLAPPED) # Overlapped + ] + UnlockFileEx.restype = ctypes.wintypes.BOOL + whole_low = 0xffffffff + whole_high = 0x7fffffff + + def _lock_file(f, exclusive): + overlapped = OVERLAPPED() + overlapped.Offset = 0 + overlapped.OffsetHigh = 0 + overlapped.hEvent = 0 + f._lock_file_overlapped_p = ctypes.pointer(overlapped) + handle = msvcrt.get_osfhandle(f.fileno()) + if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0, + whole_low, whole_high, f._lock_file_overlapped_p): + raise OSError('Locking file failed: %r' % ctypes.FormatError()) + + def _unlock_file(f): + assert f._lock_file_overlapped_p + handle = msvcrt.get_osfhandle(f.fileno()) + if not UnlockFileEx(handle, 0, + whole_low, whole_high, f._lock_file_overlapped_p): + raise OSError('Unlocking file failed: %r' % ctypes.FormatError()) + +else: + import fcntl + + def _lock_file(f, exclusive): + fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH) + + def _unlock_file(f): + fcntl.lockf(f, fcntl.LOCK_UN) + + +class locked_file(object): + def __init__(self, filename, mode, encoding=None): + assert mode in ['r', 'a', 'w'] + self.f = io.open(filename, mode, encoding=encoding) + self.mode = mode + + def __enter__(self): + exclusive = self.mode != 'r' + try: + _lock_file(self.f, exclusive) + except IOError: + self.f.close() + raise + return self + + def __exit__(self, etype, value, traceback): + try: + _unlock_file(self.f) + finally: + self.f.close() + + def __iter__(self): + return iter(self.f) + + def write(self, *args): + return self.f.write(*args) + + def read(self, *args): + return self.f.read(*args) From f4aac741d5c98c4350dda478fa4564144d99d13a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 6 Oct 2013 05:47:17 +0200 Subject: [PATCH 07/21] Move try_rm to test helpers --- test/helper.py | 22 +++++++++++++++++++--- test/test_download.py | 23 +++++++---------------- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/test/helper.py b/test/helper.py index 8e641e3cb..884cf32dc 100644 --- a/test/helper.py +++ b/test/helper.py @@ -1,3 +1,4 @@ +import errno import io import json import os.path @@ -22,18 +23,33 @@ PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "para with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: parameters = json.load(pf) + +def try_rm(filename): + """ Remove a file if it exists """ + try: + os.remove(filename) + except OSError as ose: + if ose.errno != errno.ENOENT: + raise + + class FakeYDL(YoutubeDL): def __init__(self): - self.result = [] # Different instances of the downloader can't share the same dictionary # some test set the "sublang" parameter, which would break the md5 checks. - self.params = dict(parameters) - def to_screen(self, s): + params = dict(parameters) + super(FakeYDL, self).__init__(params) + self.result = [] + + def to_screen(self, s, skip_eol=None): print(s) + def trouble(self, s, tb=None): raise Exception(s) + def download(self, x): self.result.append(x) + def expect_warning(self, regex): # Silence an expected warning matching a regex old_report_warning = self.report_warning diff --git a/test/test_download.py b/test/test_download.py index 23a66254d..23d3853c4 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -1,6 +1,5 @@ #!/usr/bin/env python -import errno import hashlib import io import os @@ -28,14 +27,6 @@ opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, You compat_urllib_request.install_opener(opener) socket.setdefaulttimeout(10) -def _try_rm(filename): - """ Remove a file if it exists """ - try: - os.remove(filename) - except OSError as ose: - if ose.errno != errno.ENOENT: - raise - md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() class YoutubeDL(youtube_dl.YoutubeDL): @@ -54,7 +45,7 @@ def _file_md5(fn): with open(fn, 'rb') as f: return hashlib.md5(f.read()).hexdigest() -from helper import get_testcases +from helper import get_testcases, try_rm defs = get_testcases() with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: @@ -97,9 +88,9 @@ def generator(test_case): test_cases = test_case.get('playlist', [test_case]) for tc in test_cases: - _try_rm(tc['file']) - _try_rm(tc['file'] + '.part') - _try_rm(tc['file'] + '.info.json') + try_rm(tc['file']) + try_rm(tc['file'] + '.part') + try_rm(tc['file'] + '.info.json') try: for retry in range(1, RETRIES + 1): try: @@ -145,9 +136,9 @@ def generator(test_case): self.assertTrue(key in info_dict.keys() and info_dict[key]) finally: for tc in test_cases: - _try_rm(tc['file']) - _try_rm(tc['file'] + '.part') - _try_rm(tc['file'] + '.info.json') + try_rm(tc['file']) + try_rm(tc['file'] + '.part') + try_rm(tc['file'] + '.info.json') return test_template From 8dbe9899a985a04690e467510c94c14f3314843b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 6 Oct 2013 06:06:30 +0200 Subject: [PATCH 08/21] Allow users to specify an age limit (fixes #1545) With these changes, users can now restrict what videos are downloaded by the intented audience, by specifying their age with --age-limit YEARS . Add rudimentary support in youtube, pornotube, and youporn. --- test/test_age_restriction.py | 53 +++++++++++++++++++++++++++++++ youtube_dl/YoutubeDL.py | 6 ++++ youtube_dl/__init__.py | 4 +++ youtube_dl/extractor/common.py | 10 ++++++ youtube_dl/extractor/pornotube.py | 4 ++- youtube_dl/extractor/youporn.py | 4 ++- youtube_dl/extractor/youtube.py | 3 +- 7 files changed, 81 insertions(+), 3 deletions(-) create mode 100644 test/test_age_restriction.py diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py new file mode 100644 index 000000000..943f9a315 --- /dev/null +++ b/test/test_age_restriction.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python + +import sys +import unittest + +# Allow direct execution +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl import YoutubeDL +from helper import try_rm + + +def _download_restricted(url, filename, age): + """ Returns true iff the file has been downloaded """ + + params = { + 'age_limit': age, + 'skip_download': True, + 'writeinfojson': True, + "outtmpl": "%(id)s.%(ext)s", + } + ydl = YoutubeDL(params) + ydl.add_default_info_extractors() + json_filename = filename + '.info.json' + try_rm(json_filename) + ydl.download([url]) + res = os.path.exists(json_filename) + try_rm(json_filename) + return res + + +class TestAgeRestriction(unittest.TestCase): + def _assert_restricted(self, url, filename, age, old_age=None): + self.assertTrue(_download_restricted(url, filename, old_age)) + self.assertFalse(_download_restricted(url, filename, age)) + + def test_youtube(self): + self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10) + + def test_youporn(self): + self._assert_restricted( + 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', + '505835.mp4', 2, old_age=25) + + def test_pornotube(self): + self._assert_restricted( + 'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing', + '1689755.flv', 13) + + +if __name__ == '__main__': + unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 2503fd09b..6258c141e 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -84,6 +84,8 @@ class YoutubeDL(object): cachedir: Location of the cache files in the filesystem. None to disable filesystem cache. noplaylist: Download single video instead of a playlist if in doubt. + age_limit: An integer representing the user's age in years. + Unsuitable videos for the given age are skipped. The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: @@ -309,6 +311,10 @@ class YoutubeDL(object): dateRange = self.params.get('daterange', DateRange()) if date not in dateRange: return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) + age_limit = self.params.get('age_limit') + if age_limit is not None: + if age_limit < info_dict.get('age_restriction', 0): + return u'Skipping "' + title + '" because it is age restricted' return None def extract_info(self, url, download=True, ie_key=None, extra_info={}): diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 03df835f2..7a399273a 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -188,6 +188,9 @@ def parseOpts(overrideArguments=None): selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None) selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None) selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False) + selection.add_option('--age-limit', metavar='YEARS', dest='age_limit', + help='download only videos suitable for the given age', + default=None, type=int) authentication.add_option('-u', '--username', @@ -631,6 +634,7 @@ def _real_main(argv=None): 'daterange': date, 'cachedir': opts.cachedir, 'youtube_print_sig_code': opts.youtube_print_sig_code, + 'age_limit': opts.age_limit, }) if opts.verbose: diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 69cdcdc1b..2a5a85dc6 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -54,6 +54,7 @@ class InfoExtractor(object): view_count: How many users have watched the video on the platform. urlhandle: [internal] The urlHandle to be used to download the file, like returned by urllib.request.urlopen + age_limit: Age restriction for the video, as an integer (years) formats: A list of dictionaries for each format available, it must be ordered from worst to best quality. Potential fields: * url Mandatory. The URL of the video file @@ -318,6 +319,15 @@ class InfoExtractor(object): self._og_regex('video')], html, name, **kargs) + def _rta_search(self, html): + # See http://www.rtalabel.org/index.php?content=howtofaq#single + if re.search(r'(?ix)Added (?P[0-9\/]+) by' upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False) if upload_date: upload_date = unified_strdate(upload_date) + age_limit = self._rta_search(webpage) info = {'id': video_id, 'url': video_url, @@ -45,6 +46,7 @@ class PornotubeIE(InfoExtractor): 'upload_date': upload_date, 'title': video_title, 'ext': 'flv', - 'format': 'flv'} + 'format': 'flv', + 'age_restriction': age_limit} return [info] diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index c85fd4b5a..e2860ec9d 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -51,6 +51,7 @@ class YouPornIE(InfoExtractor): req = compat_urllib_request.Request(url) req.add_header('Cookie', 'age_verified=1') webpage = self._download_webpage(req, video_id) + age_limit = self._rta_search(webpage) # Get JSON parameters json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters') @@ -115,7 +116,8 @@ class YouPornIE(InfoExtractor): 'ext': extension, 'format': format, 'thumbnail': thumbnail, - 'description': video_description + 'description': video_description, + 'age_restriction': age_limit, }) if self._downloader.params.get('listformats', None): diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1101011ea..9bcd035bd 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1495,7 +1495,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'description': video_description, 'player_url': player_url, 'subtitles': video_subtitles, - 'duration': video_duration + 'duration': video_duration, + 'age_restriction': 18 if age_gate else 0, }) return results From 41e8bca4d0fa3e5284466da2497ef23e09678ccb Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 6 Oct 2013 07:12:47 +0200 Subject: [PATCH 09/21] [viddler] Add basic support (Fixes #1520) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/viddler.py | 64 ++++++++++++++++++++++++++++++++ youtube_dl/utils.py | 2 +- 3 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/viddler.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index d1b7e5f99..2b054e1c9 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -117,6 +117,7 @@ from .veehd import VeeHDIE from .veoh import VeohIE from .vevo import VevoIE from .vice import ViceIE +from .viddler import ViddlerIE from .videofyme import VideofyMeIE from .vimeo import VimeoIE, VimeoChannelIE from .vine import VineIE diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py new file mode 100644 index 000000000..12c84a985 --- /dev/null +++ b/youtube_dl/extractor/viddler.py @@ -0,0 +1,64 @@ +import json +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, +) + + +class ViddlerIE(InfoExtractor): + _VALID_URL = r'(?Phttps?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P[0-9]+)' + _TEST = { + u"url": u"http://www.viddler.com/v/43903784", + u'file': u'43903784.mp4', + u'md5': u'fbbaedf7813e514eb7ca30410f439ac9', + u'info_dict': { + u"title": u"Video Made Easy", + u"uploader": u"viddler", + u"duration": 100.89, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + embed_url = mobj.group('domain') + u'/embed/' + video_id + webpage = self._download_webpage(embed_url, video_id) + + video_sources_code = self._search_regex( + r"(?ms)sources\s*:\s*(\{.*?\})", webpage, u'video URLs') + video_sources = json.loads(video_sources_code.replace("'", '"')) + + formats = [{ + 'url': video_url, + 'format': format_id, + } for video_url, format_id in video_sources.items()] + + title = self._html_search_regex( + r"title\s*:\s*'([^']*)'", webpage, u'title') + uploader = self._html_search_regex( + r"authorName\s*:\s*'([^']*)'", webpage, u'uploader', fatal=False) + duration_s = self._html_search_regex( + r"duration\s*:\s*([0-9.]*)", webpage, u'duration', fatal=False) + duration = float(duration_s) if duration_s else None + thumbnail = self._html_search_regex( + r"thumbnail\s*:\s*'([^']*)'", + webpage, u'thumbnail', fatal=False) + + info = { + '_type': 'video', + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'duration': duration, + 'formats': formats, + } + + # TODO: Remove when #980 has been merged + info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url']) + info.update(info['formats'][-1]) + + return info diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f5f9cde99..e3feb12bf 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -175,7 +175,7 @@ def compat_ord(c): compiled_regex_type = type(re.compile('')) std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0', + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', From 7e5e8306fdc67d75a995f21f3316256433e2c890 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 6 Oct 2013 07:13:14 +0200 Subject: [PATCH 10/21] release 2013.10.06 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e773e82da..08eda2197 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.04' +__version__ = '2013.10.06' From e484c81f0c0a6faf959037ac03b504e4794d72df Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 6 Oct 2013 16:03:18 +0200 Subject: [PATCH 11/21] [generic] Clarify error messages --- youtube_dl/extractor/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 764070635..7060c6f92 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -117,7 +117,7 @@ class GenericIE(InfoExtractor): except ValueError: # since this is the last-resort InfoExtractor, if # this error is thrown, it'll be thrown here - raise ExtractorError(u'Invalid URL: %s' % url) + raise ExtractorError(u'Failed to download URL: %s' % url) self.report_extraction(video_id) # Look for BrightCove: @@ -149,12 +149,12 @@ class GenericIE(InfoExtractor): # HTML5 video mobj = re.search(r'.*? Date: Sun, 6 Oct 2013 16:23:06 +0200 Subject: [PATCH 12/21] Call extracted property age_limit everywhere --- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/extractor/pornotube.py | 2 +- youtube_dl/extractor/youporn.py | 2 +- youtube_dl/extractor/youtube.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 6258c141e..9ada01bcc 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -313,7 +313,7 @@ class YoutubeDL(object): return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) age_limit = self.params.get('age_limit') if age_limit is not None: - if age_limit < info_dict.get('age_restriction', 0): + if age_limit < info_dict.get('age_limit', 0): return u'Skipping "' + title + '" because it is age restricted' return None diff --git a/youtube_dl/extractor/pornotube.py b/youtube_dl/extractor/pornotube.py index 9039dff5a..5d770ec28 100644 --- a/youtube_dl/extractor/pornotube.py +++ b/youtube_dl/extractor/pornotube.py @@ -47,6 +47,6 @@ class PornotubeIE(InfoExtractor): 'title': video_title, 'ext': 'flv', 'format': 'flv', - 'age_restriction': age_limit} + 'age_limit': age_limit} return [info] diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index e2860ec9d..b1f93dd1b 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -117,7 +117,7 @@ class YouPornIE(InfoExtractor): 'format': format, 'thumbnail': thumbnail, 'description': video_description, - 'age_restriction': age_limit, + 'age_limit': age_limit, }) if self._downloader.params.get('listformats', None): diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9bcd035bd..b02ae2572 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1496,7 +1496,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'player_url': player_url, 'subtitles': video_subtitles, 'duration': video_duration, - 'age_restriction': 18 if age_gate else 0, + 'age_limit': 18 if age_gate else 0, }) return results From ee6c9f95e1e5cf118b0bdf6abc8376bd95bc7dcf Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 6 Oct 2013 16:28:36 +0200 Subject: [PATCH 13/21] Remove superfluous parenthesis --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 1f5f75e30..856e9ac92 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -314,7 +314,7 @@ class YoutubeDL(object): if date not in dateRange: return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) if self.in_download_archive(info_dict): - return (u'%(title)s) has already been recorded in archive' + return (u'%(title)s has already been recorded in archive' % info_dict) return None From 1310bf247445148731dcfe1883b18fbf78795e9e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 6 Oct 2013 16:39:35 +0200 Subject: [PATCH 14/21] [redtube] add age_limit --- youtube_dl/extractor/redtube.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index bb19b898a..365aade56 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -30,9 +30,14 @@ class RedTubeIE(InfoExtractor): r'

(.+?)

', webpage, u'title') + # No self-labeling, but they describe themselves as + # "Home of Videos Porno" + age_limit = 18 + return { - 'id': video_id, - 'url': video_url, - 'ext': video_extension, - 'title': video_title, + 'id': video_id, + 'url': video_url, + 'ext': video_extension, + 'title': video_title, + 'age_limit': age_limit, } From 387ae5f30b5490bf2ffcdcb1c9e07f0845934ceb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 6 Oct 2013 21:56:23 +0200 Subject: [PATCH 15/21] [vimeo] Recognize urls ending in a slash (fixes #1242) --- youtube_dl/extractor/vimeo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 4a7d82b7a..cea29f035 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -17,7 +17,7 @@ class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?Phttps?://)?(?:(?:www|player)\.)?vimeo(?Ppro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)(?:[?].*)?$' + _VALID_URL = r'(?Phttps?://)?(?:(?:www|player)\.)?vimeo(?Ppro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)/?(?:[?].*)?$' _NETRC_MACHINE = 'vimeo' IE_NAME = u'vimeo' _TESTS = [ From 15870e90b0aa7fe73040936a2ef4e41cf5eed931 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 7 Oct 2013 12:21:24 +0200 Subject: [PATCH 16/21] Restore warning when user forgets to quote URL (#1396) --- youtube_dl/__init__.py | 2 ++ youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/youtube.py | 20 ++++++++++++++++---- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index ba5206387..db4c58885 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -484,6 +484,8 @@ def _real_main(argv=None): if not ie._WORKING: continue desc = getattr(ie, 'IE_DESC', ie.IE_NAME) + if desc is False: + continue if hasattr(ie, 'SEARCH_KEY'): _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise') _COUNTS = (u'', u'5', u'10', u'all') diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 2b054e1c9..c01de6b5e 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -141,6 +141,7 @@ from .youtube import ( YoutubeShowIE, YoutubeSubscriptionsIE, YoutubeRecommendedIE, + YoutubeTruncatedURLIE, YoutubeWatchLaterIE, YoutubeFavouritesIE, ) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b02ae2572..35310b39f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1250,9 +1250,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): return url_map def _real_extract(self, url): - if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url): - self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).') - # Extract original video URL from URL with redirection, like age verification, using next_url parameter mobj = re.search(self._NEXT_URL_RE, url) if mobj: @@ -1637,7 +1634,7 @@ class YoutubeChannelIE(InfoExtractor): class YoutubeUserIE(InfoExtractor): IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)' - _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' + _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!watch(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' _GDATA_PAGE_SIZE = 50 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' @@ -1830,3 +1827,18 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos') playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id') return self.url_result(playlist_id, 'YoutubePlaylist') + + +class YoutubeTruncatedURLIE(InfoExtractor): + IE_NAME = 'youtube:truncated_url' + IE_DESC = False # Do not list + _VALID_URL = r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$' + + def _real_extract(self, url): + raise ExtractorError( + u'Did you forget to quote the URL? Remember that & is a meta ' + u'character in most shells, so you want to put the URL in quotes, ' + u'like youtube-dl ' + u'\'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\'' + u' (or simply youtube-dl BaW_jenozKc ).', + expected=True) From faa6ef6bc826c03b39db49ed5b4b76960e46970e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 7 Oct 2013 14:33:23 +0200 Subject: [PATCH 17/21] [jeuxvideo] Improve code quality (fixes #1567) --- youtube_dl/extractor/jeuxvideo.py | 37 ++++++++++++++++++------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py index ae2e37a70..6bb54b932 100644 --- a/youtube_dl/extractor/jeuxvideo.py +++ b/youtube_dl/extractor/jeuxvideo.py @@ -6,6 +6,7 @@ import xml.etree.ElementTree from .common import InfoExtractor + class JeuxVideoIE(InfoExtractor): _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm' @@ -23,25 +24,29 @@ class JeuxVideoIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) title = re.match(self._VALID_URL, url).group(1) webpage = self._download_webpage(url, title) - m_download = re.search(r'', webpage) - - xml_link = m_download.group(1) + xml_link = self._html_search_regex( + r'', + webpage, u'config URL') - id = re.search(r'http://www.jeuxvideo.com/config/\w+/0011/(.*?)/\d+_player\.xml', xml_link).group(1) + video_id = self._search_regex( + r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml', + xml_link, u'video ID') - xml_config = self._download_webpage(xml_link, title, - 'Downloading XML config') + xml_config = self._download_webpage( + xml_link, title, u'Downloading XML config') config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8')) - info = re.search(r'(.*?)', - xml_config, re.MULTILINE|re.DOTALL).group(1) - info = json.loads(info)['versions'][0] + info_json = self._search_regex( + r'(?sm)(.*?)', + xml_config, u'JSON information') + info = json.loads(info_json)['versions'][0] video_url = 'http://video720.jeuxvideo.com/' + info['file'] - return {'id': id, - 'title' : config.find('titre_video').text, - 'ext' : 'mp4', - 'url' : video_url, - 'description': self._og_search_description(webpage), - 'thumbnail': config.find('image').text, - } + return { + 'id': video_id, + 'title': config.find('titre_video').text, + 'ext': 'mp4', + 'url': video_url, + 'description': self._og_search_description(webpage), + 'thumbnail': config.find('image').text, + } From 4481a754e454eebb3688f048639c21890189681b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 7 Oct 2013 14:34:19 +0200 Subject: [PATCH 18/21] release 2013.10.07 --- README.md | 3 +++ youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 14d62b189..8824daee2 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,9 @@ which means you can modify it, redistribute it or use it however you like. --datebefore DATE download only videos uploaded before this date --dateafter DATE download only videos uploaded after this date --no-playlist download only the currently playing video + --age-limit YEARS download only videos suitable for the given age + --download-archive FILE Download only videos not present in the archive + file. Record all downloaded videos in it. ## Download Options: -r, --rate-limit LIMIT maximum download rate (e.g. 50k or 44.6m) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 08eda2197..8b4f03308 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.06' +__version__ = '2013.10.07' From a27b9e8bd592c880e65ab6bb3e15e1f5f8727cd8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 7 Oct 2013 18:50:26 +0200 Subject: [PATCH 19/21] Move opener setup into a separate helper function --- youtube_dl/__init__.py | 54 ++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index db4c58885..9594fd892 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -36,6 +36,7 @@ __authors__ = ( __license__ = 'Public Domain' import codecs +import collections import getpass import optparse import os @@ -447,27 +448,7 @@ def _real_main(argv=None): all_urls = batchurls + args all_urls = [url.strip() for url in all_urls] - # General configuration - cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) - if opts.proxy is not None: - if opts.proxy == '': - proxies = {} - else: - proxies = {'http': opts.proxy, 'https': opts.proxy} - else: - proxies = compat_urllib_request.getproxies() - # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805) - if 'http' in proxies and 'https' not in proxies: - proxies['https'] = proxies['http'] - proxy_handler = compat_urllib_request.ProxyHandler(proxies) - https_handler = make_HTTPS_handler(opts) - opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler()) - # Delete the default user-agent header, which would otherwise apply in - # cases where our custom HTTP handler doesn't come into play - # (See https://github.com/rg3/youtube-dl/issues/1309 for details) - opener.addheaders =[] - compat_urllib_request.install_opener(opener) - socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) + _setup_opener(jar=jar, opts=opts) extractors = gen_extractors() @@ -698,6 +679,37 @@ def _real_main(argv=None): sys.exit(retcode) + +def _setup_opener(jar=None, opts=None, timeout=300): + if opts is None: + FakeOptions = collections.namedtuple( + 'FakeOptions', ['proxy', 'no_check_certificate']) + opts = FakeOptions(proxy=None, no_check_certificate=False) + + cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) + if opts.proxy is not None: + if opts.proxy == '': + proxies = {} + else: + proxies = {'http': opts.proxy, 'https': opts.proxy} + else: + proxies = compat_urllib_request.getproxies() + # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805) + if 'http' in proxies and 'https' not in proxies: + proxies['https'] = proxies['http'] + proxy_handler = compat_urllib_request.ProxyHandler(proxies) + https_handler = make_HTTPS_handler(opts) + opener = compat_urllib_request.build_opener( + https_handler, proxy_handler, cookie_processor, YoutubeDLHandler()) + # Delete the default user-agent header, which would otherwise apply in + # cases where our custom HTTP handler doesn't come into play + # (See https://github.com/rg3/youtube-dl/issues/1309 for details) + opener.addheaders = [] + compat_urllib_request.install_opener(opener) + socket.setdefaulttimeout(timeout) + return opener + + def main(argv=None): try: _real_main(argv) From b2ad967e4561f0bcb73fca3281341751471cab91 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 7 Oct 2013 19:06:36 +0200 Subject: [PATCH 20/21] Simplify test setup --- test/helper.py | 7 +------ test/test_download.py | 9 +-------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/test/helper.py b/test/helper.py index 884cf32dc..ad1b74dd3 100644 --- a/test/helper.py +++ b/test/helper.py @@ -12,12 +12,7 @@ from youtube_dl.utils import ( compat_urllib_request, ) -# General configuration (from __init__, not very elegant...) -jar = compat_cookiejar.CookieJar() -cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) -proxy_handler = compat_urllib_request.ProxyHandler() -opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) -compat_urllib_request.install_opener(opener) +youtube_dl._setup_opener(timeout=10) PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: diff --git a/test/test_download.py b/test/test_download.py index 23d3853c4..fdf59bb5c 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -19,14 +19,6 @@ PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "para RETRIES = 3 -# General configuration (from __init__, not very elegant...) -jar = compat_cookiejar.CookieJar() -cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) -proxy_handler = compat_urllib_request.ProxyHandler() -opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) -compat_urllib_request.install_opener(opener) -socket.setdefaulttimeout(10) - md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() class YoutubeDL(youtube_dl.YoutubeDL): @@ -45,6 +37,7 @@ def _file_md5(fn): with open(fn, 'rb') as f: return hashlib.md5(f.read()).hexdigest() +import helper # Set up remaining global configuration from helper import get_testcases, try_rm defs = get_testcases() From 2ae3edb1cfa5b0afc2e6b6e2d4ea470dadf655a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 7 Oct 2013 21:10:31 +0200 Subject: [PATCH 21/21] Fix the printing of the proxy map in debug mode The proxies have to be extracted from the opener.handlers --- youtube_dl/__init__.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 9594fd892..3ff78daac 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -448,7 +448,7 @@ def _real_main(argv=None): all_urls = batchurls + args all_urls = [url.strip() for url in all_urls] - _setup_opener(jar=jar, opts=opts) + opener = _setup_opener(jar=jar, opts=opts) extractors = gen_extractors() @@ -641,7 +641,12 @@ def _real_main(argv=None): except: pass write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n') - write_string(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n') + + proxy_map = {} + for handler in opener.handlers: + if hasattr(handler, 'proxies'): + proxy_map.update(handler.proxies) + write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n') ydl.add_default_info_extractors()