From 03359e9864bfb925f577fa5b16c3ef22884127aa Mon Sep 17 00:00:00 2001 From: rupertbaxter2 Date: Sun, 3 Aug 2014 07:34:04 -0700 Subject: [PATCH 0001/6219] Added --sleep-interval option --- youtube_dl/__init__.py | 8 ++++++++ youtube_dl/downloader/common.py | 3 +++ youtube_dl/utils.py | 3 +++ 3 files changed, 14 insertions(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 429630ce5..2bd5ec33b 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -351,6 +351,8 @@ def parseOpts(overrideArguments=None): downloader.add_option('-r', '--rate-limit', dest='ratelimit', metavar='LIMIT', help='maximum download rate in bytes per second (e.g. 50K or 4.2M)') + downloader.add_option('--sleep-interval', + dest='sleepinterval', metavar='SLEEPINTERVAL', help='number of seconds to sleep between downloads (default is %default)', default="0") downloader.add_option('-R', '--retries', dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) downloader.add_option('--buffer-size', @@ -671,6 +673,11 @@ def _real_main(argv=None): if numeric_limit is None: parser.error(u'invalid rate limit specified') opts.ratelimit = numeric_limit + if opts.sleepinterval is not None: + try: + opts.sleepinterval = abs(int(opts.sleepinterval)) + except ValueError: + parser.error(u'invalid sleep interval specified') if opts.min_filesize is not None: numeric_limit = FileDownloader.parse_bytes(opts.min_filesize) if numeric_limit is None: @@ -767,6 +774,7 @@ def _real_main(argv=None): 'restrictfilenames': opts.restrictfilenames, 'ignoreerrors': opts.ignoreerrors, 'ratelimit': opts.ratelimit, + 'sleepinterval': opts.sleepinterval, 'nooverwrites': opts.nooverwrites, 'retries': opts.retries, 'buffersize': opts.buffersize, diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 917f3450e..8e0e386bf 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -278,6 +278,9 @@ class FileDownloader(object): """Download to a filename using the info from info_dict Return True on success and False otherwise """ + sleep_interval = self.params.get('sleepinterval', 0) + self.to_screen(u'[download] Sleeping %d seconds...' %sleep_interval) + time.sleep(sleep_interval) # Check file already present if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False): self.report_file_already_downloaded(filename) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e40b367c2..d199d26d2 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -6,6 +6,7 @@ import codecs import contextlib import ctypes import datetime +import time import email.utils import errno import getpass @@ -747,6 +748,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): del req.headers['User-agent'] req.headers['User-agent'] = req.headers['Youtubedl-user-agent'] del req.headers['Youtubedl-user-agent'] + #print("sleeping\n") + #time.sleep(1) return req def http_response(self, req, resp): From 2f61fe4cccc1ef4186943f4eed2e89f8fe2e2c23 Mon Sep 17 00:00:00 2001 From: rupertbaxter2 Date: Sun, 3 Aug 2014 07:38:04 -0700 Subject: [PATCH 0002/6219] Removed unneccesary changes to utils.py --- youtube_dl/utils.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d199d26d2..e40b367c2 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -6,7 +6,6 @@ import codecs import contextlib import ctypes import datetime -import time import email.utils import errno import getpass @@ -748,8 +747,6 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): del req.headers['User-agent'] req.headers['User-agent'] = req.headers['Youtubedl-user-agent'] del req.headers['Youtubedl-user-agent'] - #print("sleeping\n") - #time.sleep(1) return req def http_response(self, req, resp): From a42c9215983c4d62d1c000c9dede6e0850dbb5e4 Mon Sep 17 00:00:00 2001 From: rupertbaxter2 Date: Wed, 13 Aug 2014 04:38:40 -0700 Subject: [PATCH 0003/6219] Removed sleep and sleep output when interval is zero --- youtube_dl/downloader/common.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 48e829deb..c1da065b5 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -279,8 +279,9 @@ class FileDownloader(object): Return True on success and False otherwise """ sleep_interval = self.params.get('sleepinterval', 0) - self.to_screen(u'[download] Sleeping %d seconds...' %sleep_interval) - time.sleep(sleep_interval) + if sleep_interval > 0: + self.to_screen(u'[download] Sleeping %d seconds...' %sleep_interval) + time.sleep(sleep_interval) # Check file already present if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False): self.report_file_already_downloaded(filename) From 4231235cdaa18d050f3119a80ac409138fb8e8bd Mon Sep 17 00:00:00 2001 From: h-collector Date: Sun, 6 Jul 2014 01:42:41 +0200 Subject: [PATCH 0004/6219] Fix issues with fc2 Fix issues #2912 and #3171 --- youtube_dl/extractor/fc2.py | 49 ++++++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index c663a0f81..ecfb233f9 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -7,14 +7,16 @@ import hashlib from .common import InfoExtractor from ..utils import ( ExtractorError, + compat_urllib_parse, compat_urllib_request, compat_urlparse, ) class FC2IE(InfoExtractor): - _VALID_URL = r'^http://video\.fc2\.com/((?P[^/]+)/)?content/(?P[^/]+)' + _VALID_URL = r'^http://video\.fc2\.com/((?P[^/]+)/)?(a/)?content/(?P[^/]+)' IE_NAME = 'fc2' + _NETRC_MACHINE = 'fc2' _TEST = { 'url': 'http://video.fc2.com/en/content/20121103kUan1KHs', 'md5': 'a6ebe8ebe0396518689d963774a54eb7', @@ -25,17 +27,53 @@ class FC2IE(InfoExtractor): }, } + #def _real_initialize(self): + # self._login() + + def _login(self): + (username, password) = self._get_login_info() + if (username is None) or (password is None): + self._downloader.report_warning('unable to log in: will be downloading in non authorized mode') + return False + + # Log in + login_form_strs = { + 'email': username, + 'password': password, + 'done': 'video', + 'Submit': ' Login ', + } + + # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode + # chokes on unicode + login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items()) + login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8') + request = compat_urllib_request.Request( + 'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data) + + login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in') + if 'mode=redirect&login=done' not in login_results: + self._downloader.report_warning('unable to log in: bad username or password') + return False + + # this is also needed + login_redir = compat_urllib_request.Request('http://id.fc2.com/?mode=redirect&login=done') + redir_res = self._download_webpage(login_redir, None, note='Login redirect', errnote='Something is not right') + + return True + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) self._downloader.cookiejar.clear_session_cookies() # must clear + self._login() title = self._og_search_title(webpage) thumbnail = self._og_search_thumbnail(webpage) - refer = url.replace('/content/', '/a/content/') + refer = (url if '/a/content/' in url else url.replace('/content/', '/a/content/')); mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest() info_url = ( @@ -47,7 +85,12 @@ class FC2IE(InfoExtractor): info = compat_urlparse.parse_qs(info_webpage) if 'err_code' in info: - raise ExtractorError('Error code: %s' % info['err_code'][0]) + #raise ExtractorError('Error code: %s' % info['err_code'][0]) + # most of the time we can still download wideo even if err_code is 403 or 602 + print 'Error code was: %s... but still trying' % info['err_code'][0] + + if 'filepath' not in info: + raise ExtractorError('No file path for download. Maybe not logged?') video_url = info['filepath'][0] + '?mid=' + info['mid'][0] title_info = info.get('title') From 40b1cbafacea338e51d43fa78438eaf21a1bcbcd Mon Sep 17 00:00:00 2001 From: h-collector Date: Sun, 6 Jul 2014 01:48:07 +0200 Subject: [PATCH 0005/6219] Update fc2.py --- youtube_dl/extractor/fc2.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index ecfb233f9..0933485e0 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -87,10 +87,10 @@ class FC2IE(InfoExtractor): if 'err_code' in info: #raise ExtractorError('Error code: %s' % info['err_code'][0]) # most of the time we can still download wideo even if err_code is 403 or 602 - print 'Error code was: %s... but still trying' % info['err_code'][0] - + self._downloader.report_warning('Error code was: %s... but still trying' % info['err_code'][0]) + if 'filepath' not in info: - raise ExtractorError('No file path for download. Maybe not logged?') + raise ExtractorError('Cannot download file. Are you logged?') video_url = info['filepath'][0] + '?mid=' + info['mid'][0] title_info = info.get('title') From 5a000b45b339f6516f2a5a3bdfd2869713e8438a Mon Sep 17 00:00:00 2001 From: h-collector Date: Mon, 7 Jul 2014 22:47:28 +0200 Subject: [PATCH 0006/6219] Don't use report_warning for reporting warnings In tests warning is converted to error --- youtube_dl/extractor/fc2.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index 0933485e0..3073ab0d4 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -33,7 +33,7 @@ class FC2IE(InfoExtractor): def _login(self): (username, password) = self._get_login_info() if (username is None) or (password is None): - self._downloader.report_warning('unable to log in: will be downloading in non authorized mode') + self.to_screen('unable to log in: will be downloading in non authorized mode') # report_warning return False # Log in @@ -53,7 +53,7 @@ class FC2IE(InfoExtractor): login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in') if 'mode=redirect&login=done' not in login_results: - self._downloader.report_warning('unable to log in: bad username or password') + self.to_screen('unable to log in: bad username or password') # report_warning return False # this is also needed @@ -87,7 +87,7 @@ class FC2IE(InfoExtractor): if 'err_code' in info: #raise ExtractorError('Error code: %s' % info['err_code'][0]) # most of the time we can still download wideo even if err_code is 403 or 602 - self._downloader.report_warning('Error code was: %s... but still trying' % info['err_code'][0]) + self.to_screen('Error code was: %s... but still trying' % info['err_code'][0]) # report_warning if 'filepath' not in info: raise ExtractorError('Cannot download file. Are you logged?') From 3d5f7a3947a8d304bc7ad46217f171996e95c475 Mon Sep 17 00:00:00 2001 From: Johannes Knoedtel Date: Mon, 12 Jan 2015 22:26:20 +0100 Subject: [PATCH 0007/6219] [utils] Prevent override of custom headers. The dict of headers of request objects in urllib has its keys always capitalized. This causes the lookup to fail and overwrite the header. If for example a Extractor tries to add a "User-Agent" header the internal representation in the request object is "User-agent". The header is therefore clobbered by the "User-Agent" in std_headers, because the strings are not equal. --- youtube_dl/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 98732e8e9..daf94abd1 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -611,7 +611,9 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): def http_request(self, req): for h, v in std_headers.items(): - if h not in req.headers: + # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 + # The dict keys are capitalized because of this bug by urllib + if h.capitalize() not in req.headers: req.add_header(h, v) if 'Youtubedl-no-compression' in req.headers: if 'Accept-encoding' in req.headers: From ba319696a99f34342a321d23bb41d3d442a773cc Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 17 Jan 2015 23:56:34 +0100 Subject: [PATCH 0008/6219] [options] Clarify that --password can be left out (#4723) --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index a30974efd..f25c12e52 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -264,7 +264,7 @@ def parseOpts(overrideArguments=None): authentication.add_option( '-p', '--password', dest='password', metavar='PASSWORD', - help='account password') + help='account password. If this option is left out, youtube-dl will ask interactively.') authentication.add_option( '-2', '--twofactor', dest='twofactor', metavar='TWOFACTOR', From fdb2ed7455d509a5f0b17ad5c1d721d5484bde8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Jan 2015 08:09:18 +0600 Subject: [PATCH 0009/6219] [abc7news] Add extractor (Closes #4734) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/abc7news.py | 68 ++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 youtube_dl/extractor/abc7news.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0902eb437..2b9d4455d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals from .abc import ABCIE +from .abc7news import Abc7NewsIE from .academicearth import AcademicEarthCourseIE from .addanime import AddAnimeIE from .adobetv import AdobeTVIE diff --git a/youtube_dl/extractor/abc7news.py b/youtube_dl/extractor/abc7news.py new file mode 100644 index 000000000..c04949c21 --- /dev/null +++ b/youtube_dl/extractor/abc7news.py @@ -0,0 +1,68 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import parse_iso8601 + + +class Abc7NewsIE(InfoExtractor): + _VALID_URL = r'https?://abc7news\.com(?:/[^/]+/(?P[^/]+))?/(?P\d+)' + _TESTS = [ + { + 'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/', + 'info_dict': { + 'id': '472581', + 'display_id': 'east-bay-museum-celebrates-vintage-synthesizers', + 'ext': 'mp4', + 'title': 'East Bay museum celebrates history of synthesized music', + 'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1421123075, + 'upload_date': '20150113', + 'uploader': 'Jonathan Bloom', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + 'url': 'http://abc7news.com/472581', + 'only_matching': True, + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') or video_id + + webpage = self._download_webpage(url, display_id) + + m3u8 = self._html_search_meta( + 'contentURL', webpage, 'm3u8 url', fatal=True) + + formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4') + self._sort_formats(formats) + + title = self._og_search_title(webpage).strip() + description = self._og_search_description(webpage).strip() + thumbnail = self._og_search_thumbnail(webpage) + timestamp = parse_iso8601(self._search_regex( + r'
\s*