From 03359e9864bfb925f577fa5b16c3ef22884127aa Mon Sep 17 00:00:00 2001 From: rupertbaxter2 Date: Sun, 3 Aug 2014 07:34:04 -0700 Subject: [PATCH 001/327] Added --sleep-interval option --- youtube_dl/__init__.py | 8 ++++++++ youtube_dl/downloader/common.py | 3 +++ youtube_dl/utils.py | 3 +++ 3 files changed, 14 insertions(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 429630ce5..2bd5ec33b 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -351,6 +351,8 @@ def parseOpts(overrideArguments=None): downloader.add_option('-r', '--rate-limit', dest='ratelimit', metavar='LIMIT', help='maximum download rate in bytes per second (e.g. 50K or 4.2M)') + downloader.add_option('--sleep-interval', + dest='sleepinterval', metavar='SLEEPINTERVAL', help='number of seconds to sleep between downloads (default is %default)', default="0") downloader.add_option('-R', '--retries', dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) downloader.add_option('--buffer-size', @@ -671,6 +673,11 @@ def _real_main(argv=None): if numeric_limit is None: parser.error(u'invalid rate limit specified') opts.ratelimit = numeric_limit + if opts.sleepinterval is not None: + try: + opts.sleepinterval = abs(int(opts.sleepinterval)) + except ValueError: + parser.error(u'invalid sleep interval specified') if opts.min_filesize is not None: numeric_limit = FileDownloader.parse_bytes(opts.min_filesize) if numeric_limit is None: @@ -767,6 +774,7 @@ def _real_main(argv=None): 'restrictfilenames': opts.restrictfilenames, 'ignoreerrors': opts.ignoreerrors, 'ratelimit': opts.ratelimit, + 'sleepinterval': opts.sleepinterval, 'nooverwrites': opts.nooverwrites, 'retries': opts.retries, 'buffersize': opts.buffersize, diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 917f3450e..8e0e386bf 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -278,6 +278,9 @@ class FileDownloader(object): """Download to a filename using the info from info_dict Return True on success and False otherwise """ + sleep_interval = self.params.get('sleepinterval', 0) + self.to_screen(u'[download] Sleeping %d seconds...' %sleep_interval) + time.sleep(sleep_interval) # Check file already present if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False): self.report_file_already_downloaded(filename) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e40b367c2..d199d26d2 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -6,6 +6,7 @@ import codecs import contextlib import ctypes import datetime +import time import email.utils import errno import getpass @@ -747,6 +748,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): del req.headers['User-agent'] req.headers['User-agent'] = req.headers['Youtubedl-user-agent'] del req.headers['Youtubedl-user-agent'] + #print("sleeping\n") + #time.sleep(1) return req def http_response(self, req, resp): From 2f61fe4cccc1ef4186943f4eed2e89f8fe2e2c23 Mon Sep 17 00:00:00 2001 From: rupertbaxter2 Date: Sun, 3 Aug 2014 07:38:04 -0700 Subject: [PATCH 002/327] Removed unneccesary changes to utils.py --- youtube_dl/utils.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d199d26d2..e40b367c2 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -6,7 +6,6 @@ import codecs import contextlib import ctypes import datetime -import time import email.utils import errno import getpass @@ -748,8 +747,6 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): del req.headers['User-agent'] req.headers['User-agent'] = req.headers['Youtubedl-user-agent'] del req.headers['Youtubedl-user-agent'] - #print("sleeping\n") - #time.sleep(1) return req def http_response(self, req, resp): From a42c9215983c4d62d1c000c9dede6e0850dbb5e4 Mon Sep 17 00:00:00 2001 From: rupertbaxter2 Date: Wed, 13 Aug 2014 04:38:40 -0700 Subject: [PATCH 003/327] Removed sleep and sleep output when interval is zero --- youtube_dl/downloader/common.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 48e829deb..c1da065b5 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -279,8 +279,9 @@ class FileDownloader(object): Return True on success and False otherwise """ sleep_interval = self.params.get('sleepinterval', 0) - self.to_screen(u'[download] Sleeping %d seconds...' %sleep_interval) - time.sleep(sleep_interval) + if sleep_interval > 0: + self.to_screen(u'[download] Sleeping %d seconds...' %sleep_interval) + time.sleep(sleep_interval) # Check file already present if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False): self.report_file_already_downloaded(filename) From 4231235cdaa18d050f3119a80ac409138fb8e8bd Mon Sep 17 00:00:00 2001 From: h-collector Date: Sun, 6 Jul 2014 01:42:41 +0200 Subject: [PATCH 004/327] Fix issues with fc2 Fix issues #2912 and #3171 --- youtube_dl/extractor/fc2.py | 49 ++++++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index c663a0f81..ecfb233f9 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -7,14 +7,16 @@ import hashlib from .common import InfoExtractor from ..utils import ( ExtractorError, + compat_urllib_parse, compat_urllib_request, compat_urlparse, ) class FC2IE(InfoExtractor): - _VALID_URL = r'^http://video\.fc2\.com/((?P[^/]+)/)?content/(?P[^/]+)' + _VALID_URL = r'^http://video\.fc2\.com/((?P[^/]+)/)?(a/)?content/(?P[^/]+)' IE_NAME = 'fc2' + _NETRC_MACHINE = 'fc2' _TEST = { 'url': 'http://video.fc2.com/en/content/20121103kUan1KHs', 'md5': 'a6ebe8ebe0396518689d963774a54eb7', @@ -25,17 +27,53 @@ class FC2IE(InfoExtractor): }, } + #def _real_initialize(self): + # self._login() + + def _login(self): + (username, password) = self._get_login_info() + if (username is None) or (password is None): + self._downloader.report_warning('unable to log in: will be downloading in non authorized mode') + return False + + # Log in + login_form_strs = { + 'email': username, + 'password': password, + 'done': 'video', + 'Submit': ' Login ', + } + + # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode + # chokes on unicode + login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items()) + login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8') + request = compat_urllib_request.Request( + 'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data) + + login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in') + if 'mode=redirect&login=done' not in login_results: + self._downloader.report_warning('unable to log in: bad username or password') + return False + + # this is also needed + login_redir = compat_urllib_request.Request('http://id.fc2.com/?mode=redirect&login=done') + redir_res = self._download_webpage(login_redir, None, note='Login redirect', errnote='Something is not right') + + return True + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) self._downloader.cookiejar.clear_session_cookies() # must clear + self._login() title = self._og_search_title(webpage) thumbnail = self._og_search_thumbnail(webpage) - refer = url.replace('/content/', '/a/content/') + refer = (url if '/a/content/' in url else url.replace('/content/', '/a/content/')); mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest() info_url = ( @@ -47,7 +85,12 @@ class FC2IE(InfoExtractor): info = compat_urlparse.parse_qs(info_webpage) if 'err_code' in info: - raise ExtractorError('Error code: %s' % info['err_code'][0]) + #raise ExtractorError('Error code: %s' % info['err_code'][0]) + # most of the time we can still download wideo even if err_code is 403 or 602 + print 'Error code was: %s... but still trying' % info['err_code'][0] + + if 'filepath' not in info: + raise ExtractorError('No file path for download. Maybe not logged?') video_url = info['filepath'][0] + '?mid=' + info['mid'][0] title_info = info.get('title') From 40b1cbafacea338e51d43fa78438eaf21a1bcbcd Mon Sep 17 00:00:00 2001 From: h-collector Date: Sun, 6 Jul 2014 01:48:07 +0200 Subject: [PATCH 005/327] Update fc2.py --- youtube_dl/extractor/fc2.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index ecfb233f9..0933485e0 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -87,10 +87,10 @@ class FC2IE(InfoExtractor): if 'err_code' in info: #raise ExtractorError('Error code: %s' % info['err_code'][0]) # most of the time we can still download wideo even if err_code is 403 or 602 - print 'Error code was: %s... but still trying' % info['err_code'][0] - + self._downloader.report_warning('Error code was: %s... but still trying' % info['err_code'][0]) + if 'filepath' not in info: - raise ExtractorError('No file path for download. Maybe not logged?') + raise ExtractorError('Cannot download file. Are you logged?') video_url = info['filepath'][0] + '?mid=' + info['mid'][0] title_info = info.get('title') From 5a000b45b339f6516f2a5a3bdfd2869713e8438a Mon Sep 17 00:00:00 2001 From: h-collector Date: Mon, 7 Jul 2014 22:47:28 +0200 Subject: [PATCH 006/327] Don't use report_warning for reporting warnings In tests warning is converted to error --- youtube_dl/extractor/fc2.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index 0933485e0..3073ab0d4 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -33,7 +33,7 @@ class FC2IE(InfoExtractor): def _login(self): (username, password) = self._get_login_info() if (username is None) or (password is None): - self._downloader.report_warning('unable to log in: will be downloading in non authorized mode') + self.to_screen('unable to log in: will be downloading in non authorized mode') # report_warning return False # Log in @@ -53,7 +53,7 @@ class FC2IE(InfoExtractor): login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in') if 'mode=redirect&login=done' not in login_results: - self._downloader.report_warning('unable to log in: bad username or password') + self.to_screen('unable to log in: bad username or password') # report_warning return False # this is also needed @@ -87,7 +87,7 @@ class FC2IE(InfoExtractor): if 'err_code' in info: #raise ExtractorError('Error code: %s' % info['err_code'][0]) # most of the time we can still download wideo even if err_code is 403 or 602 - self._downloader.report_warning('Error code was: %s... but still trying' % info['err_code'][0]) + self.to_screen('Error code was: %s... but still trying' % info['err_code'][0]) # report_warning if 'filepath' not in info: raise ExtractorError('Cannot download file. Are you logged?') From d79323136fabc2cd72afc7c124e17797e32df514 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 23 Jan 2015 11:15:18 +0100 Subject: [PATCH 007/327] [utils] Simplify HTTPS socket creation We were duplicating (bad) code and doing crazy things with SSL. Just use TLSv1 across the board, and do with one implementation of HTTPSConnection.connect. Fixes #4696. --- youtube_dl/utils.py | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 3536a5bd6..4be323926 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -411,25 +411,9 @@ def make_HTTPS_handler(params, **kwargs): pass if sys.version_info < (3, 2): - import httplib - - class HTTPSConnectionV3(httplib.HTTPSConnection): - def __init__(self, *args, **kwargs): - httplib.HTTPSConnection.__init__(self, *args, **kwargs) - - def connect(self): - sock = socket.create_connection((self.host, self.port), self.timeout) - if getattr(self, '_tunnel_host', False): - self.sock = sock - self._tunnel() - try: - self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_TLSv1) - except ssl.SSLError: - self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23) - - return YoutubeDLHTTPSHandler(params, https_conn_class=HTTPSConnectionV3, **kwargs) + return YoutubeDLHTTPSHandler(params, **kwargs) else: # Python < 3.4 - context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) + context = ssl.SSLContext(ssl.PROTOCOL_TLSv1) context.verify_mode = (ssl.CERT_NONE if opts_no_check_certificate else ssl.CERT_REQUIRED) @@ -560,7 +544,9 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): sock = compat_socket_create_connection( (self.host, self.port), self.timeout, sa) if is_https: - self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file) + self.sock = ssl.wrap_socket( + sock, self.key_file, self.cert_file, + ssl_version=ssl.PROTOCOL_TLSv1) else: self.sock = sock hc.connect = functools.partial(_hc_connect, hc) From ba551681573aafedf9a0a234a9522cc06b12a4b7 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 23 Jan 2015 11:20:24 +0100 Subject: [PATCH 008/327] release 2015.01.23.2 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 28458fd69..674c2f279 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.01.23.1' +__version__ = '2015.01.23.2' From 501f13fbf3d1f7225f91e3e0ad008df2cd3219f1 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 23 Jan 2015 12:00:25 +0100 Subject: [PATCH 009/327] [generic] Add support for Cinerama player (Fixes #4752) --- youtube_dl/extractor/generic.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index b893d8149..a028c4ed4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -489,6 +489,16 @@ class GenericIE(InfoExtractor): 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing', } }, + # Cinerama player + { + 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm', + 'info_dict': { + 'id': '730m_DandD_1901_512k', + 'ext': 'mp4', + 'uploader': 'www.abc.net.au', + 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015', + } + } ] def report_following_redirect(self, new_url): @@ -1046,6 +1056,10 @@ class GenericIE(InfoExtractor): \s*{[^}]+? ["']?clip["']?\s*:\s*\{\s* ["']?url["']?\s*:\s*["']([^"']+)["'] ''', webpage)) + if not found: + # Cinerama player + found = re.findall( + r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage) if not found: # Try to find twitter cards info found = filter_video(re.findall( From 649f7966f73b215f02b9c4db5a07d8c4b7e17acb Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 23 Jan 2015 12:07:13 +0100 Subject: [PATCH 010/327] Fix --sleep-interval (#3426) --- youtube_dl/__init__.py | 6 ------ youtube_dl/options.py | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index ea1660452..7bd7295e2 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -132,11 +132,6 @@ def _real_main(argv=None): if numeric_limit is None: parser.error('invalid rate limit specified') opts.ratelimit = numeric_limit - if opts.sleepinterval is not None: - try: - opts.sleepinterval = abs(int(opts.sleepinterval)) - except ValueError: - parser.error(u'invalid sleep interval specified') if opts.min_filesize is not None: numeric_limit = FileDownloader.parse_bytes(opts.min_filesize) if numeric_limit is None: @@ -272,7 +267,6 @@ def _real_main(argv=None): 'restrictfilenames': opts.restrictfilenames, 'ignoreerrors': opts.ignoreerrors, 'ratelimit': opts.ratelimit, - 'sleepinterval': opts.sleepinterval, 'nooverwrites': opts.nooverwrites, 'retries': opts.retries, 'buffersize': opts.buffersize, diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 12c9826f8..262c60013 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -423,7 +423,7 @@ def parseOpts(overrideArguments=None): help='Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH') workarounds.add_option( '--sleep-interval', metavar='SECONDS', - dest='sleep_interval', + dest='sleep_interval', type=float, help='Number of seconds to sleep before each download.') verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') From aa42e87340e491d0b151e9dad368711fc275c7b9 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 23 Jan 2015 12:17:12 +0100 Subject: [PATCH 011/327] [utils] Catch strange Windows errors (Closes #4733) --- youtube_dl/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 4be323926..463cc20ff 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -863,6 +863,9 @@ def _windows_write_string(s, out): except AttributeError: # If the output stream doesn't have a fileno, it's virtual return False + except io.UnsupportedOperation: + # Some strange Windows pseudo files? + return False if fileno not in WIN_OUTPUT_IDS: return False From 6f58db89820fe03effb2014ee70e1ffd0b1f3017 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 23 Jan 2015 12:17:19 +0100 Subject: [PATCH 012/327] release 2015.01.23.3 --- README.md | 2 ++ youtube_dl/version.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1408ebba0..869523720 100644 --- a/README.md +++ b/README.md @@ -259,6 +259,8 @@ which means you can modify it, redistribute it or use it however you like. --bidi-workaround Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH + --sleep-interval SECONDS Number of seconds to sleep before each + download. ## Video Format Options: -f, --format FORMAT video format code, specify the order of diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 674c2f279..f39b94361 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.01.23.2' +__version__ = '2015.01.23.3' From bf7fa94ec7202bde963a75ab903996ac575910db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 23 Jan 2015 16:31:52 +0100 Subject: [PATCH 013/327] [downloader/f4m] build_fragments_list: Support videos with more than 1 segment --- youtube_dl/downloader/f4m.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index c460c167a..c68b2c303 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -177,13 +177,12 @@ def build_fragments_list(boot_info): """ Return a list of (segment, fragment) for each fragment in the video """ res = [] segment_run_table = boot_info['segments'][0] - # I've only found videos with one segment - segment_run_entry = segment_run_table['segment_run'][0] - n_frags = segment_run_entry[1] fragment_run_entry_table = boot_info['fragments'][0]['fragments'] first_frag_number = fragment_run_entry_table[0]['first'] - for (i, frag_number) in zip(range(1, n_frags + 1), itertools.count(first_frag_number)): - res.append((1, frag_number)) + fragments_counter = itertools.count(first_frag_number) + for segment, fragments_count in segment_run_table['segment_run']: + for _ in range(fragments_count): + res.append((segment, next(fragments_counter))) return res From 0920e5830f890580ec16cdd10bfe8def73a1a09f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 23 Jan 2015 16:39:23 +0100 Subject: [PATCH 014/327] [atresplayer] Don't include f4m formats if they are protected by DRM (fixes #4705) --- youtube_dl/extractor/atresplayer.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 5db1941b3..37321ef1d 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -107,7 +107,14 @@ class AtresPlayerIE(InfoExtractor): for _, video_url in fmt_json['resultObject'].items(): if video_url.endswith('/Manifest'): - formats.extend(self._extract_f4m_formats(video_url[:-9] + '/manifest.f4m', video_id)) + if 'geodeswowsmpra3player' in video_url: + f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0] + f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path) + # this videos are protected by DRM, the f4m downloader doesn't support them + continue + else: + f4m_url = video_url[:-9] + '/manifest.f4m' + formats.extend(self._extract_f4m_formats(f4m_url, video_id)) else: formats.append({ 'url': video_url, From c2e64f71d075bbc916f343916ff1e679f642a821 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 23 Jan 2015 21:58:40 +0600 Subject: [PATCH 015/327] [twitch] Add support for bookmarks --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/twitch.py | 41 ++++++++++++++++++++++++++++---- 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 9ab90ac62..9e1ce5db3 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -467,6 +467,7 @@ from .twitch import ( TwitchVodIE, TwitchProfileIE, TwitchPastBroadcastsIE, + TwitchBookmarksIE, TwitchStreamIE, ) from .ubu import UbuIE diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 340cadcf5..741df7cbc 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -220,12 +220,18 @@ class TwitchPlaylistBaseIE(TwitchBaseIE): response = self._download_json( self._PLAYLIST_URL % (channel_id, offset, limit), channel_id, 'Downloading %s videos JSON page %d' % (self._PLAYLIST_TYPE, counter)) - videos = response['videos'] - if not videos: + page_entries = self._extract_playlist_page(response) + if not page_entries: break - entries.extend([self.url_result(video['url']) for video in videos]) + entries.extend(page_entries) offset += limit - return self.playlist_result(entries, channel_id, channel_name) + return self.playlist_result( + [self.url_result(entry) for entry in set(entries)], + channel_id, channel_name) + + def _extract_playlist_page(self, response): + videos = response.get('videos') + return [video['url'] for video in videos] if videos else [] def _real_extract(self, url): return self._extract_playlist(self._match_id(url)) @@ -262,6 +268,31 @@ class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE): } +class TwitchBookmarksIE(TwitchPlaylistBaseIE): + IE_NAME = 'twitch:bookmarks' + _VALID_URL = r'%s/(?P[^/]+)/profile/bookmarks/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE + _PLAYLIST_URL = '%s/api/bookmark/?user=%%s&offset=%%d&limit=%%d' % TwitchBaseIE._API_BASE + _PLAYLIST_TYPE = 'bookmarks' + + _TEST = { + 'url': 'http://www.twitch.tv/ognos/profile/bookmarks', + 'info_dict': { + 'id': 'ognos', + 'title': 'Ognos', + }, + 'playlist_mincount': 3, + } + + def _extract_playlist_page(self, response): + entries = [] + for bookmark in response.get('bookmarks', []): + video = bookmark.get('video') + if not video: + continue + entries.append(video['url']) + return entries + + class TwitchStreamIE(TwitchBaseIE): IE_NAME = 'twitch:stream' _VALID_URL = r'%s/(?P[^/]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE @@ -348,4 +379,4 @@ class TwitchStreamIE(TwitchBaseIE): 'view_count': view_count, 'formats': formats, 'is_live': True, - } + } \ No newline at end of file From e793f7671c3e303e4fbe2c4cd18e80ad4b5e5417 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 23 Jan 2015 17:09:26 +0100 Subject: [PATCH 016/327] [liveleak] Modernize --- youtube_dl/extractor/liveleak.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index b04be1e8c..e56a4d6b1 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -8,7 +8,7 @@ from ..utils import int_or_none class LiveLeakIE(InfoExtractor): - _VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P[\w_]+)(?:.*)' + _VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P[\w_]+)(?:.*)' _TESTS = [{ 'url': 'http://www.liveleak.com/view?i=757_1364311680', 'md5': '0813c2430bea7a46bf13acf3406992f4', @@ -43,8 +43,7 @@ class LiveLeakIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('video_id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip() From ebd46aed5119899826629cf751ba5abe7a65d50b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 23 Jan 2015 22:21:55 +0600 Subject: [PATCH 017/327] [atresplayer] Filter URLs and clarify android format ids --- youtube_dl/extractor/atresplayer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 37321ef1d..8fd69b971 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -105,7 +105,9 @@ class AtresPlayerIE(InfoExtractor): raise ExtractorError( '%s returned error: %s' % (self.IE_NAME, result), expected=True) - for _, video_url in fmt_json['resultObject'].items(): + for format_id, video_url in fmt_json['resultObject'].items(): + if format_id == 'token' or not video_url.startswith('http'): + continue if video_url.endswith('/Manifest'): if 'geodeswowsmpra3player' in video_url: f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0] @@ -118,7 +120,7 @@ class AtresPlayerIE(InfoExtractor): else: formats.append({ 'url': video_url, - 'format_id': 'android', + 'format_id': 'android-%s' % format_id, 'preference': 1, }) self._sort_formats(formats) From 26e274666d285026dfb6033cdad74cc2a108e2bc Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 23 Jan 2015 17:22:14 +0100 Subject: [PATCH 018/327] [liveleak] Add original videos (Fixes #4768) --- youtube_dl/extractor/liveleak.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index e56a4d6b1..35822067f 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -11,17 +11,17 @@ class LiveLeakIE(InfoExtractor): _VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P[\w_]+)(?:.*)' _TESTS = [{ 'url': 'http://www.liveleak.com/view?i=757_1364311680', - 'md5': '0813c2430bea7a46bf13acf3406992f4', + 'md5': '50f79e05ba149149c1b4ea961223d5b3', 'info_dict': { 'id': '757_1364311680', - 'ext': 'mp4', + 'ext': 'flv', 'description': 'extremely bad day for this guy..!', 'uploader': 'ljfriel2', 'title': 'Most unlucky car accident' } }, { 'url': 'http://www.liveleak.com/view?i=f93_1390833151', - 'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf', + 'md5': 'b13a29626183c9d33944e6a04f41aafc', 'info_dict': { 'id': 'f93_1390833151', 'ext': 'mp4', @@ -80,9 +80,19 @@ class LiveLeakIE(InfoExtractor): sources = json.loads(sources_json) formats = [{ + 'format_id': '%s' % i, 'format_note': s.get('label'), 'url': s['file'], - } for s in sources] + } for i, s in enumerate(sources)] + for i, s in enumerate(sources): + orig_url = s['file'].replace('.h264_base.mp4', '') + if s['file'] != orig_url: + formats.append({ + 'format_id': 'original-%s' % i, + 'format_note': s.get('label'), + 'url': orig_url, + 'preference': 1, + }) self._sort_formats(formats) return { From 0c17278843a30d4aea1f8334985e6fbae17000d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 23 Jan 2015 22:54:29 +0600 Subject: [PATCH 019/327] [atresplayer] Extract subtitles --- youtube_dl/extractor/atresplayer.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 8fd69b971..f42862be3 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import time import hmac -from .common import InfoExtractor +from .subtitles import SubtitlesInfoExtractor from ..compat import ( compat_str, compat_urllib_parse, @@ -17,7 +17,7 @@ from ..utils import ( ) -class AtresPlayerIE(InfoExtractor): +class AtresPlayerIE(SubtitlesInfoExtractor): _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P.+?)_\d+\.html' _TESTS = [ { @@ -143,6 +143,15 @@ class AtresPlayerIE(InfoExtractor): description = xpath_text(art, './description', 'description') thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail') + subtitles = {} + subtitle = xpath_text(episode, './media/asset/files/subtitle', 'subtitle') + if subtitle: + subtitles['es'] = subtitle + + if self._downloader.params.get('listsubtitles', False): + self._list_available_subtitles(video_id, subtitles) + return + return { 'id': video_id, 'title': title, @@ -150,4 +159,5 @@ class AtresPlayerIE(InfoExtractor): 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, + 'subtitles': self.extract_subtitles(video_id, subtitles), } From 62cd676c7474f696804eda653558ada94c5953a0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 23 Jan 2015 18:39:12 +0100 Subject: [PATCH 020/327] [youtube] Fixup DASH m4a headers This fixes #2288, #2506, #2607, #3681, #4741, #4767. --- youtube_dl/YoutubeDL.py | 28 +++++++++++++++++++++++----- youtube_dl/extractor/youtube.py | 6 +++--- youtube_dl/postprocessor/__init__.py | 2 ++ youtube_dl/postprocessor/ffmpeg.py | 20 +++++++++++++++++++- 4 files changed, 47 insertions(+), 9 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 8f34b17b4..521e4055e 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -73,6 +73,7 @@ from .extractor import get_info_extractor, gen_extractors from .downloader import get_suitable_downloader from .downloader.rtmp import rtmpdump_version from .postprocessor import ( + FFmpegFixupM4aPP, FFmpegFixupStretchedPP, FFmpegMergerPP, FFmpegPostProcessor, @@ -213,7 +214,7 @@ class YoutubeDL(object): - "never": do nothing - "warn": only emit a warning - "detect_or_warn": check whether we can do anything - about it, warn otherwise + about it, warn otherwise (default) source_address: (Experimental) Client-side IP address to bind to. call_home: Boolean, true iff we are allowed to contact the youtube-dl servers for debugging. @@ -1219,11 +1220,12 @@ class YoutubeDL(object): if success: # Fixup content + fixup_policy = self.params.get('fixup') + if fixup_policy is None: + fixup_policy = 'detect_or_warn' + stretched_ratio = info_dict.get('stretched_ratio') if stretched_ratio is not None and stretched_ratio != 1: - fixup_policy = self.params.get('fixup') - if fixup_policy is None: - fixup_policy = 'detect_or_warn' if fixup_policy == 'warn': self.report_warning('%s: Non-uniform pixel ratio (%s)' % ( info_dict['id'], stretched_ratio)) @@ -1237,7 +1239,23 @@ class YoutubeDL(object): '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % ( info_dict['id'], stretched_ratio)) else: - assert fixup_policy == 'ignore' + assert fixup_policy in ('ignore', 'never') + + if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash': + if fixup_policy == 'warn': + self.report_warning('%s: writing DASH m4a. Only some players support this container.' % ( + info_dict['id'])) + elif fixup_policy == 'detect_or_warn': + fixup_pp = FFmpegFixupM4aPP(self) + if fixup_pp.available: + info_dict.setdefault('__postprocessors', []) + info_dict['__postprocessors'].append(fixup_pp) + else: + self.report_warning( + '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % ( + info_dict['id'])) + else: + assert fixup_policy in ('ignore', 'never') try: self.post_process(filename, info_dict) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2a1f8be0a..eb55d24ce 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -264,9 +264,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'}, # Dash mp4 audio - '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50}, - '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50}, - '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50}, + '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'}, + '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'}, + '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'}, # Dash webm '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40}, diff --git a/youtube_dl/postprocessor/__init__.py b/youtube_dl/postprocessor/__init__.py index f8507951c..0ffbca258 100644 --- a/youtube_dl/postprocessor/__init__.py +++ b/youtube_dl/postprocessor/__init__.py @@ -7,6 +7,7 @@ from .ffmpeg import ( FFmpegEmbedSubtitlePP, FFmpegExtractAudioPP, FFmpegFixupStretchedPP, + FFmpegFixupM4aPP, FFmpegMergerPP, FFmpegMetadataPP, FFmpegVideoConvertorPP, @@ -25,6 +26,7 @@ __all__ = [ 'FFmpegAudioFixPP', 'FFmpegEmbedSubtitlePP', 'FFmpegExtractAudioPP', + 'FFmpegFixupM4aPP', 'FFmpegFixupStretchedPP', 'FFmpegMergerPP', 'FFmpegMetadataPP', diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index faccdc43d..855d1e6db 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -564,7 +564,7 @@ class FFmpegFixupStretchedPP(FFmpegPostProcessor): def run(self, info): stretched_ratio = info.get('stretched_ratio') if stretched_ratio is None or stretched_ratio == 1: - return + return True, info filename = info['filepath'] temp_filename = prepend_extension(filename, 'temp') @@ -577,3 +577,21 @@ class FFmpegFixupStretchedPP(FFmpegPostProcessor): os.rename(encodeFilename(temp_filename), encodeFilename(filename)) return True, info + + +class FFmpegFixupM4aPP(FFmpegPostProcessor): + def run(self, info): + if info.get('container') != 'm4a_dash': + return True, info + + filename = info['filepath'] + temp_filename = prepend_extension(filename, 'temp') + + options = ['-c', 'copy', '-f', 'mp4'] + self._downloader.to_screen('[ffmpeg] Correcting container in "%s"' % filename) + self.run_ffmpeg(filename, temp_filename, options) + + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + + return True, info From 6896a52721ce3a0dc4f33fa5dda508c30c56ce22 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 23 Jan 2015 18:58:32 +0100 Subject: [PATCH 021/327] release 2015.01.23.4 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index f39b94361..35f3e1b6b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.01.23.3' +__version__ = '2015.01.23.4' From 3e055aa5c3c069476b1ac9fc103dc1386cad763c Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Fri, 23 Jan 2015 21:22:49 +0200 Subject: [PATCH 022/327] [cliphunter] Fix extraction and update test (Fixes #4362) --- youtube_dl/extractor/cliphunter.py | 56 ++++++++++++------------------ 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/youtube_dl/extractor/cliphunter.py b/youtube_dl/extractor/cliphunter.py index 2edab90a3..d46592cc5 100644 --- a/youtube_dl/extractor/cliphunter.py +++ b/youtube_dl/extractor/cliphunter.py @@ -1,9 +1,7 @@ from __future__ import unicode_literals -import json -import re - from .common import InfoExtractor +from ..utils import determine_ext _translation_table = { @@ -27,10 +25,10 @@ class CliphunterIE(InfoExtractor): ''' _TEST = { 'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo', - 'md5': 'a2ba71eebf523859fe527a61018f723e', + 'md5': 'b7c9bbd4eb3a226ab91093714dcaa480', 'info_dict': { 'id': '1012420', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Fun Jynx Maze solo', 'thumbnail': 're:^https?://.*\.jpg$', 'age_limit': 18, @@ -44,39 +42,31 @@ class CliphunterIE(InfoExtractor): video_title = self._search_regex( r'mediaTitle = "([^"]+)"', webpage, 'title') - pl_fiji = self._search_regex( - r'pl_fiji = \'([^\']+)\'', webpage, 'video data') - pl_c_qual = self._search_regex( - r'pl_c_qual = "(.)"', webpage, 'video quality') - video_url = _decode(pl_fiji) - formats = [{ - 'url': video_url, - 'format_id': 'default-%s' % pl_c_qual, - }] + fmts = {} + for fmt in ('mp4', 'flv'): + fmt_list = self._parse_json(self._search_regex( + r'var %sjson\s*=\s*(\[.*?\]);' % fmt, webpage, '%s formats' % fmt), video_id) + for f in fmt_list: + fmts[f['fname']] = _decode(f['sUrl']) - qualities_json = self._search_regex( - r'var pl_qualities\s*=\s*(.*?);\n', webpage, 'quality info') - qualities_data = json.loads(qualities_json) + qualities = self._parse_json(self._search_regex( + r'var player_btns\s*=\s*(.*?);\n', webpage, 'quality info'), video_id) - for i, t in enumerate( - re.findall(r"pl_fiji_([a-z0-9]+)\s*=\s*'([^']+')", webpage)): - quality_id, crypted_url = t - video_url = _decode(crypted_url) + formats = [] + for fname, url in fmts.items(): f = { - 'format_id': quality_id, - 'url': video_url, - 'quality': i, + 'url': url, } - if quality_id in qualities_data: - qd = qualities_data[quality_id] - m = re.match( - r'''(?x)(?P[0-9]+)x(?P[0-9]+)<\\/b> - \s*\(\s*(?P[0-9]+)\s*kb\\/s''', qd) - if m: - f['width'] = int(m.group('width')) - f['height'] = int(m.group('height')) - f['tbr'] = int(m.group('tbr')) + if fname in qualities: + qual = qualities[fname] + f.update({ + 'format_id': '%s_%sp' % (determine_ext(url), qual['h']), + 'width': qual['w'], + 'height': qual['h'], + 'tbr': qual['br'], + }) formats.append(f) + self._sort_formats(formats) thumbnail = self._search_regex( From 48f00d15b1bdbe05be397849f6659fe5e0daa9c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 23 Jan 2015 21:03:00 +0100 Subject: [PATCH 023/327] [auengine] Remove extractor The test is probably infringing copyright and nobody has provided a new test (see #4643). --- youtube_dl/extractor/__init__.py | 1 - youtube_dl/extractor/auengine.py | 50 -------------------------------- 2 files changed, 51 deletions(-) delete mode 100644 youtube_dl/extractor/auengine.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 9e1ce5db3..3f7ca6f7d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -29,7 +29,6 @@ from .arte import ( from .atresplayer import AtresPlayerIE from .atttechchannel import ATTTechChannelIE from .audiomack import AudiomackIE, AudiomackAlbumIE -from .auengine import AUEngineIE from .azubu import AzubuIE from .bambuser import BambuserIE, BambuserChannelIE from .bandcamp import BandcampIE, BandcampAlbumIE diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py deleted file mode 100644 index a1b666be0..000000000 --- a/youtube_dl/extractor/auengine.py +++ /dev/null @@ -1,50 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_urllib_parse -from ..utils import ( - determine_ext, - ExtractorError, - remove_end, -) - - -class AUEngineIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?auengine\.com/embed\.php\?.*?file=(?P[^&]+).*?' - - _TEST = { - 'url': 'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370', - 'md5': '48972bdbcf1a3a2f5533e62425b41d4f', - 'info_dict': { - 'id': 'lfvlytY6', - 'ext': 'mp4', - 'title': '[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]' - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - title = self._html_search_regex( - r'\s*(?P<title>.+?)\s*', webpage, 'title') - video_urls = re.findall(r'http://\w+.auengine.com/vod/.*[^\W]', webpage) - video_url = compat_urllib_parse.unquote(video_urls[0]) - thumbnails = re.findall(r'http://\w+.auengine.com/thumb/.*[^\W]', webpage) - thumbnail = compat_urllib_parse.unquote(thumbnails[0]) - - if not video_url: - raise ExtractorError('Could not find video URL') - - ext = '.' + determine_ext(video_url) - title = remove_end(title, ext) - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'thumbnail': thumbnail, - 'http_referer': 'http://www.auengine.com/flowplayer/flowplayer.commercial-3.2.14.swf', - } From 12d1fb5aa9aabfe48676d6c60bbd114cc53a513f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 23 Jan 2015 21:05:07 +0100 Subject: [PATCH 024/327] [twitch] PEP8 --- youtube_dl/extractor/twitch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 741df7cbc..87290d002 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -379,4 +379,4 @@ class TwitchStreamIE(TwitchBaseIE): 'view_count': view_count, 'formats': formats, 'is_live': True, - } \ No newline at end of file + } From fdaaaaa878c42975936bf7f6ecf39a97436fefe2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 23 Jan 2015 21:10:10 +0100 Subject: [PATCH 025/327] README: Recommend using flake8 instead of pyflake and pep8 separately --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 869523720..36b87444e 100644 --- a/README.md +++ b/README.md @@ -586,7 +586,7 @@ If you want to add support for a new site, you can follow this quick list (assum 5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. 7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want. -8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501). +8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8). 9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: $ git add youtube_dl/extractor/__init__.py From a055469fafe088b6aa0e569d989cbf7f70535951 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 23 Jan 2015 23:50:31 +0100 Subject: [PATCH 026/327] [downloader] Improve downloader selection --- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/downloader/__init__.py | 31 +++++++++++++++---------------- youtube_dl/utils.py | 22 ++++++++++++++++++++++ 3 files changed, 38 insertions(+), 17 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 521e4055e..e61e6c2a7 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1179,7 +1179,7 @@ class YoutubeDL(object): if not self.params.get('skip_download', False): try: def dl(name, info): - fd = get_suitable_downloader(info)(self, self.params) + fd = get_suitable_downloader(info, self.params)(self, self.params) for ph in self._progress_hooks: fd.add_progress_hook(ph) if self.params.get('verbose'): diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index 31e28df58..2aca3cab5 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -9,27 +9,26 @@ from .rtmp import RtmpFD from .f4m import F4mFD from ..utils import ( - determine_ext, + determine_protocol, ) +PROTOCOL_MAP = { + 'rtmp': RtmpFD, + 'm3u8_native': NativeHlsFD, + 'm3u8': HlsFD, + 'mms': MplayerFD, + 'rtsp': MplayerFD, + 'f4m': F4mFD, +} -def get_suitable_downloader(info_dict): + +def get_suitable_downloader(info_dict, params={}): """Get the downloader class that can handle the info dict.""" - url = info_dict['url'] - protocol = info_dict.get('protocol') + protocol = determine_protocol(info_dict) + info_dict['protocol'] = protocol + + return PROTOCOL_MAP.get(protocol, HttpFD) - if url.startswith('rtmp'): - return RtmpFD - if protocol == 'm3u8_native': - return NativeHlsFD - if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'): - return HlsFD - if url.startswith('mms') or url.startswith('rtsp'): - return MplayerFD - if determine_ext(url) == 'f4m': - return F4mFD - else: - return HttpFD __all__ = [ 'get_suitable_downloader', diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 463cc20ff..2970d02a1 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1642,3 +1642,25 @@ def is_html(first_bytes): s = first_bytes.decode('utf-8', 'replace') return re.match(r'^\s*<', s) + + +def determine_protocol(info_dict): + protocol = info_dict.get('protocol') + if protocol is not None: + return protocol + + url = info_dict['url'] + if url.startswith('rtmp'): + return 'rtmp' + elif url.startswith('mms'): + return 'mms' + elif url.startswith('rtsp'): + return 'rtsp' + + ext = determine_ext(url) + if ext == 'm3u8': + return 'm3u8' + elif ext == 'f4m': + return 'f4m' + + return compat_urllib_parse_urlparse(url).scheme From 222516d97d5ff9e62f3a9860fe2e65aa99c001b3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 24 Jan 2015 01:38:48 +0100 Subject: [PATCH 027/327] [downloader] Lay groundwork for external downloaders. This comes with a very simply implementation for wget; the real work is in setting up the infrastructure. --- youtube_dl/YoutubeDL.py | 1 + youtube_dl/__init__.py | 1 + youtube_dl/downloader/__init__.py | 9 +- youtube_dl/downloader/common.py | 21 +++++ youtube_dl/downloader/external.py | 131 ++++++++++++++++++++++++++++++ youtube_dl/downloader/rtmp.py | 14 +--- youtube_dl/options.py | 6 ++ 7 files changed, 169 insertions(+), 14 deletions(-) create mode 100644 youtube_dl/downloader/external.py diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e61e6c2a7..54e732943 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -219,6 +219,7 @@ class YoutubeDL(object): call_home: Boolean, true iff we are allowed to contact the youtube-dl servers for debugging. sleep_interval: Number of seconds to sleep before each download. + external_downloader: Executable of the external downloader to call. The following parameters are not used by YoutubeDL itself, they are used by diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 7bd7295e2..3fc7dc5c2 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -330,6 +330,7 @@ def _real_main(argv=None): 'source_address': opts.source_address, 'call_home': opts.call_home, 'sleep_interval': opts.sleep_interval, + 'external_downloader': opts.external_downloader, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index 2aca3cab5..eff1122c5 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -1,12 +1,13 @@ from __future__ import unicode_literals from .common import FileDownloader +from .external import get_external_downloader +from .f4m import F4mFD from .hls import HlsFD from .hls import NativeHlsFD from .http import HttpFD from .mplayer import MplayerFD from .rtmp import RtmpFD -from .f4m import F4mFD from ..utils import ( determine_protocol, @@ -27,6 +28,12 @@ def get_suitable_downloader(info_dict, params={}): protocol = determine_protocol(info_dict) info_dict['protocol'] = protocol + external_downloader = params.get('external_downloader') + if external_downloader is not None: + ed = get_external_downloader(external_downloader) + if ed.supports(info_dict): + return ed + return PROTOCOL_MAP.get(protocol, HttpFD) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 82c917d92..c35c42c1d 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -325,3 +325,24 @@ class FileDownloader(object): # See YoutubeDl.py (search for progress_hooks) for a description of # this interface self._progress_hooks.append(ph) + + def _debug_cmd(self, args, subprocess_encoding, exe=None): + if not self.params.get('verbose', False): + return + + if exe is None: + exe = os.path.basename(args[0]) + + if subprocess_encoding: + str_args = [ + a.decode(subprocess_encoding) if isinstance(a, bytes) else a + for a in args] + else: + str_args = args + try: + import pipes + shell_quote = lambda args: ' '.join(map(pipes.quote, str_args)) + except ImportError: + shell_quote = repr + self.to_screen('[debug] %s command line: %s' % ( + exe, shell_quote(str_args))) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py new file mode 100644 index 000000000..c05596255 --- /dev/null +++ b/youtube_dl/downloader/external.py @@ -0,0 +1,131 @@ +from __future__ import unicode_literals + +import os.path +import subprocess +import sys + +from .common import FileDownloader +from ..utils import ( + encodeFilename, + std_headers, +) + + +class ExternalFD(FileDownloader): + def real_download(self, filename, info_dict): + self.report_destination(filename) + tmpfilename = self.temp_name(filename) + + retval = self._call_downloader(tmpfilename, info_dict) + if retval == 0: + fsize = os.path.getsize(encodeFilename(tmpfilename)) + self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize)) + self.try_rename(tmpfilename, filename) + self._hook_progress({ + 'downloaded_bytes': fsize, + 'total_bytes': fsize, + 'filename': filename, + 'status': 'finished', + }) + return True + else: + self.to_stderr('\n') + self.report_error('%s exited with code %d' % ( + self.get_basename(), retval)) + return False + + @classmethod + def get_basename(cls): + return cls.__name__[:-2].lower() + + @property + def exe(self): + return self.params.get('external_downloader') + + @classmethod + def supports(cls, info_dict): + return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps') + + def _calc_headers(self, info_dict): + res = std_headers.copy() + + ua = info_dict.get('user_agent') + if ua is not None: + res['User-Agent'] = ua + + cookies = self._calc_cookies(info_dict) + if cookies: + res['Cookie'] = cookies + + return res + + def _calc_cookies(self, info_dict): + class _PseudoRequest(object): + def __init__(self, url): + self.url = url + self.headers = {} + self.unverifiable = False + + def add_unredirected_header(self, k, v): + self.headers[k] = v + + def get_full_url(self): + return self.url + + def is_unverifiable(self): + return self.unverifiable + + def has_header(self, h): + return h in self.headers + + pr = _PseudoRequest(info_dict['url']) + self.ydl.cookiejar.add_cookie_header(pr) + return pr.headers.get('Cookie') + + def _call_downloader(self, tmpfilename, info_dict): + """ Either overwrite this or implement _make_cmd """ + cmd = self._make_cmd(tmpfilename, info_dict) + + if sys.platform == 'win32' and sys.version_info < (3, 0): + # Windows subprocess module does not actually support Unicode + # on Python 2.x + # See http://stackoverflow.com/a/9951851/35070 + subprocess_encoding = sys.getfilesystemencoding() + cmd = [a.encode(subprocess_encoding, 'ignore') for a in cmd] + else: + subprocess_encoding = None + self._debug_cmd(cmd, subprocess_encoding) + + p = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = p.communicate() + if p.returncode != 0: + self.to_stderr(stderr) + return p.returncode + + +class WgetFD(ExternalFD): + def _make_cmd(self, tmpfilename, info_dict): + cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] + for key, val in self._calc_headers(info_dict).items(): + cmd += ['--header', '%s: %s' % (key, val)] + cmd += ['--', info_dict['url']] + return cmd + + +_BY_NAME = dict( + (klass.get_basename(), klass) + for name, klass in globals().items() + if name.endswith('FD') and name != 'ExternalFD' +) + + +def list_external_downloaders(): + return sorted(_BY_NAME.keys()) + + +def get_external_downloader(external_downloader): + """ Given the name of the executable, see whether we support the given + downloader . """ + bn = os.path.basename(external_downloader) + return _BY_NAME[bn] diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index 5346cb9a0..6dbbc053c 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -152,19 +152,7 @@ class RtmpFD(FileDownloader): else: subprocess_encoding = None - if self.params.get('verbose', False): - if subprocess_encoding: - str_args = [ - a.decode(subprocess_encoding) if isinstance(a, bytes) else a - for a in args] - else: - str_args = args - try: - import pipes - shell_quote = lambda args: ' '.join(map(pipes.quote, str_args)) - except ImportError: - shell_quote = repr - self.to_screen('[debug] rtmpdump command line: ' + shell_quote(str_args)) + self._debug_cmd(args, subprocess_encoding, exe='rtmpdump') RD_SUCCESS = 0 RD_FAILED = 1 diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 262c60013..b38b8349f 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -5,6 +5,7 @@ import optparse import shlex import sys +from .downloader.external import list_external_downloaders from .compat import ( compat_expanduser, compat_getenv, @@ -389,6 +390,11 @@ def parseOpts(overrideArguments=None): '--playlist-reverse', action='store_true', help='Download playlist videos in reverse order') + downloader.add_option( + '--external-downloader', + dest='external_downloader', metavar='COMMAND', + help='(experimental) Use the specified external downloader. ' + 'Currently supports %s' % ','.join(list_external_downloaders())) workarounds = optparse.OptionGroup(parser, 'Workarounds') workarounds.add_option( From 6aa4f54d6660bfb8fefe225e47cba7ade62fe564 Mon Sep 17 00:00:00 2001 From: Irfan Charania Date: Fri, 23 Jan 2015 17:41:07 -0800 Subject: [PATCH 028/327] [videott] improve extraction --- youtube_dl/extractor/videott.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/videott.py b/youtube_dl/extractor/videott.py index 1f938838c..6f8084140 100644 --- a/youtube_dl/extractor/videott.py +++ b/youtube_dl/extractor/videott.py @@ -13,7 +13,7 @@ from ..utils import ( class VideoTtIE(InfoExtractor): ID_NAME = 'video.tt' IE_DESC = 'video.tt - Your True Tube' - _VALID_URL = r'http://(?:www\.)?video\.tt/(?:video/|watch_video\.php\?v=)(?P[\da-zA-Z]{9})' + _VALID_URL = r'http://(?:www\.)?video\.tt/(?:video\/|embed\/|watch_video\.php\?v=)(?P[\da-zA-Z]{9})' _TEST = { 'url': 'http://www.video.tt/watch_video.php?v=amd5YujV8', From fc2d6abfe7e7a216825296001b889e5455e0412f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 24 Jan 2015 16:11:40 +0600 Subject: [PATCH 029/327] [videott] Improve _VALID_URL and add test --- youtube_dl/extractor/videott.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/videott.py b/youtube_dl/extractor/videott.py index 6f8084140..ececc7ee0 100644 --- a/youtube_dl/extractor/videott.py +++ b/youtube_dl/extractor/videott.py @@ -13,9 +13,9 @@ from ..utils import ( class VideoTtIE(InfoExtractor): ID_NAME = 'video.tt' IE_DESC = 'video.tt - Your True Tube' - _VALID_URL = r'http://(?:www\.)?video\.tt/(?:video\/|embed\/|watch_video\.php\?v=)(?P[\da-zA-Z]{9})' + _VALID_URL = r'http://(?:www\.)?video\.tt/(?:(?:video|embed)/|watch_video\.php\?v=)(?P[\da-zA-Z]{9})' - _TEST = { + _TESTS = [{ 'url': 'http://www.video.tt/watch_video.php?v=amd5YujV8', 'md5': 'b13aa9e2f267effb5d1094443dff65ba', 'info_dict': { @@ -26,7 +26,10 @@ class VideoTtIE(InfoExtractor): 'upload_date': '20130827', 'uploader': 'joseph313', } - } + }, { + 'url': 'http://video.tt/embed/amd5YujV8', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From b95aab8482881e8b1f7fba856da816a2dbc50d0f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 24 Jan 2015 11:42:20 +0100 Subject: [PATCH 030/327] [youtube:truncated_url] Add x-yt-cl URLs (#4773) --- youtube_dl/extractor/youtube.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index eb55d24ce..b7b91f354 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1682,11 +1682,17 @@ class YoutubeTruncatedURLIE(InfoExtractor): IE_NAME = 'youtube:truncated_url' IE_DESC = False # Do not list _VALID_URL = r'''(?x) - (?:https?://)?[^/]+/watch\?(?: + (?:https?://)? + (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/ + (?:watch\?(?: feature=[a-z_]+| - annotation_id=annotation_[^&]+ - )?$| - (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$ + annotation_id=annotation_[^&]+| + x-yt-cl=[0-9]+| + )? + | + attribution_link\?a=[^&]+ + ) + $ ''' _TESTS = [{ @@ -1695,6 +1701,12 @@ class YoutubeTruncatedURLIE(InfoExtractor): }, { 'url': 'http://www.youtube.com/watch?', 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/watch?feature=foo', + 'only_matching': True, }] def _real_extract(self, url): @@ -1710,7 +1722,7 @@ class YoutubeTruncatedURLIE(InfoExtractor): class YoutubeTruncatedIDIE(InfoExtractor): IE_NAME = 'youtube:truncated_id' IE_DESC = False # Do not list - _VALID_URL = r'https?://(?:www\.)youtube\.com/watch\?v=(?P[0-9A-Za-z_-]{1,10})$' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P[0-9A-Za-z_-]{1,10})$' _TESTS = [{ 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob', From 384b62028a4c3c35727d714ccfc9944a36934069 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 24 Jan 2015 13:33:45 +0100 Subject: [PATCH 031/327] [downloader/external] Add curl and aria2c (Closes #182) --- youtube_dl/downloader/external.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index c05596255..7ebe40096 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -97,13 +97,22 @@ class ExternalFD(FileDownloader): self._debug_cmd(cmd, subprocess_encoding) p = subprocess.Popen( - cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = p.communicate() + cmd, stderr=subprocess.PIPE) + _, stderr = p.communicate() if p.returncode != 0: self.to_stderr(stderr) return p.returncode +class CurlFD(ExternalFD): + def _make_cmd(self, tmpfilename, info_dict): + cmd = [self.exe, '-o', tmpfilename] + for key, val in self._calc_headers(info_dict).items(): + cmd += ['--header', '%s: %s' % (key, val)] + cmd += ['--', info_dict['url']] + return cmd + + class WgetFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] @@ -113,6 +122,20 @@ class WgetFD(ExternalFD): return cmd +class Aria2cFD(ExternalFD): + def _make_cmd(self, tmpfilename, info_dict): + cmd = [ + self.exe, '-c', + '--min-split-size', '1M', '--max-connection-per-server', '4'] + dn = os.path.dirname(tmpfilename) + if dn: + cmd += ['--dir', dn] + cmd += ['--out', os.path.basename(tmpfilename)] + for key, val in self._calc_headers(info_dict).items(): + cmd += ['--header', '%s: %s' % (key, val)] + cmd += ['--', info_dict['url']] + return cmd + _BY_NAME = dict( (klass.get_basename(), klass) for name, klass in globals().items() From 3fcfb8e9faf3cf1dcadedd6fecc5158a86d07065 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 24 Jan 2015 18:07:21 +0100 Subject: [PATCH 032/327] [utils] YoutubeDLHandler: don't use 'Youtubedl-user-agent' for overriding the default user agent Setting the 'User-Agent' header is enough --- youtube_dl/downloader/http.py | 2 +- youtube_dl/extractor/atresplayer.py | 2 +- youtube_dl/extractor/bliptv.py | 2 +- youtube_dl/extractor/mtv.py | 2 +- youtube_dl/utils.py | 5 ----- 5 files changed, 4 insertions(+), 9 deletions(-) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index e68f20c9f..90a2e4c53 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -25,7 +25,7 @@ class HttpFD(FileDownloader): # Do not include the Accept-Encoding header headers = {'Youtubedl-no-compression': 'True'} if 'user_agent' in info_dict: - headers['Youtubedl-user-agent'] = info_dict['user_agent'] + headers['User-agent'] = info_dict['user_agent'] if 'http_referer' in info_dict: headers['Referer'] = info_dict['http_referer'] add_headers = info_dict.get('http_headers') diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index f42862be3..f016368fa 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -95,7 +95,7 @@ class AtresPlayerIE(SubtitlesInfoExtractor): for fmt in ['windows', 'android_tablet']: request = compat_urllib_request.Request( self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token)) - request.add_header('Youtubedl-user-agent', self._USER_AGENT) + request.add_header('User-Agent', self._USER_AGENT) fmt_json = self._download_json( request, video_id, 'Downloading %s video JSON' % fmt) diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index 14b814120..436cc5155 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -199,7 +199,7 @@ class BlipTVIE(SubtitlesInfoExtractor): # For some weird reason, blip.tv serves a video instead of subtitles # when we request with a common UA req = compat_urllib_request.Request(url) - req.add_header('Youtubedl-user-agent', 'youtube-dl') + req.add_header('User-Agent', 'youtube-dl') return self._download_webpage(req, None, note=False) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 5ebc78033..22a726327 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -53,7 +53,7 @@ class MTVServicesInfoExtractor(InfoExtractor): webpage_url = self._MOBILE_TEMPLATE % mtvn_id req = compat_urllib_request.Request(webpage_url) # Otherwise we get a webpage that would execute some javascript - req.add_header('Youtubedl-user-agent', 'curl/7') + req.add_header('User-Agent', 'curl/7') webpage = self._download_webpage(req, mtvn_id, 'Downloading mobile page') metrics_url = unescapeHTML(self._search_regex(r' Date: Sat, 24 Jan 2015 18:19:58 +0100 Subject: [PATCH 033/327] [extractors] Use http_headers for setting the User-Agent and the Referer --- youtube_dl/downloader/http.py | 4 ---- youtube_dl/extractor/appletrailers.py | 4 +++- youtube_dl/extractor/common.py | 1 - youtube_dl/extractor/videomega.py | 4 +++- youtube_dl/extractor/wdr.py | 4 +++- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 90a2e4c53..4db50ee90 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -24,10 +24,6 @@ class HttpFD(FileDownloader): # Do not include the Accept-Encoding header headers = {'Youtubedl-no-compression': 'True'} - if 'user_agent' in info_dict: - headers['User-agent'] = info_dict['user_agent'] - if 'http_referer' in info_dict: - headers['Referer'] = info_dict['http_referer'] add_headers = info_dict.get('http_headers') if add_headers: headers.update(add_headers) diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index 7cd0482c7..70621946d 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -129,7 +129,9 @@ class AppleTrailersIE(InfoExtractor): 'thumbnail': thumbnail, 'upload_date': upload_date, 'uploader_id': uploader_id, - 'user_agent': 'QuickTime compatible (youtube-dl)', + 'http_headers': { + 'User-Agent': 'QuickTime compatible (youtube-dl)', + }, }) return { diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 03f3f18c8..523400062 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -108,7 +108,6 @@ class InfoExtractor(object): (quality takes higher priority) -1 for default (order by other properties), -2 or smaller for less than default. - * http_referer HTTP Referer header value to set. * http_method HTTP method to use for the download. * http_headers A dictionary of additional HTTP headers to add to the request. diff --git a/youtube_dl/extractor/videomega.py b/youtube_dl/extractor/videomega.py index fc6e05fe0..273030316 100644 --- a/youtube_dl/extractor/videomega.py +++ b/youtube_dl/extractor/videomega.py @@ -62,5 +62,7 @@ class VideoMegaIE(InfoExtractor): 'title': title, 'formats': formats, 'thumbnail': thumbnail, - 'http_referer': iframe_url, + 'http_headers': { + 'Referer': iframe_url, + }, } diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 45466e31b..313b9c15d 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -169,7 +169,9 @@ class WDRMobileIE(InfoExtractor): 'title': mobj.group('title'), 'age_limit': int(mobj.group('age_limit')), 'url': url, - 'user_agent': 'mobile', + 'http_headers': { + 'User-Agent': 'mobile', + }, } From 587a9c27496979d983296944fee8d1fa589e1b85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 24 Jan 2015 18:25:09 +0100 Subject: [PATCH 034/327] [downloader/external] Use the 'http_headers' field --- youtube_dl/downloader/external.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 7ebe40096..5bf24ccbb 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -49,9 +49,9 @@ class ExternalFD(FileDownloader): def _calc_headers(self, info_dict): res = std_headers.copy() - ua = info_dict.get('user_agent') - if ua is not None: - res['User-Agent'] = ua + add_headers = info_dict.get('http_headers') + if add_headers: + res.update(add_headers) cookies = self._calc_cookies(info_dict) if cookies: From 8011fba3ae4301995f77c64ac63a4a467ef73b7e Mon Sep 17 00:00:00 2001 From: David-Development Date: Sat, 24 Jan 2015 18:28:16 +0100 Subject: [PATCH 035/327] [rtl2] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/rtl2.py | 98 ++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 youtube_dl/extractor/rtl2.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3f7ca6f7d..03c56156a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -349,6 +349,7 @@ from .rtbf import RTBFIE from .rte import RteIE from .rtlnl import RtlXlIE from .rtlnow import RTLnowIE +from .rtl2 import RTL2IE from .rtp import RTPIE from .rts import RTSIE from .rtve import RTVEALaCartaIE, RTVELiveIE diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py new file mode 100644 index 000000000..2bdbf2ec2 --- /dev/null +++ b/youtube_dl/extractor/rtl2.py @@ -0,0 +1,98 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + clean_html, + unified_strdate, + int_or_none, +) + + +class RTL2IE(InfoExtractor): + """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW""" + _VALID_URL = r'http?://(?P(?P(www\.)?rtl2\.de)/.*/(?P.*))' + _TEST = { + 'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0', + 'md5': 'dsadasdada', + 'info_dict': { + 'id': 'folge-203-0', + 'ext': 'f4v', + 'title': 'GRIP sucht den Sommerk\xf6nig', + 'description' : 'Matthias, Det und Helge treten gegeneinander an.' + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + }, + #'params': { + # rtmp download + # 'skip_download': True, + #}, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_page_url = 'http://%s/' % mobj.group('domain') + video_id = mobj.group('video_id') + + webpage = self._download_webpage('http://' + mobj.group('url'), video_id) + + vico_id = self._html_search_regex(r'vico_id: ([0-9]+)', webpage, '%s'); + vivi_id = self._html_search_regex(r'vivi_id: ([0-9]+)', webpage, '%s'); + + info_url = 'http://www.rtl2.de/video/php/get_video.php?vico_id=' + vico_id + '&vivi_id=' + vivi_id + webpage = self._download_webpage(info_url, '') + + video_info = json.loads(webpage.decode("latin1")) + print video_info + + + #self._download_webpage('http://cp108781.edgefcs.net/crossdomain.xml', '') + + download_url = video_info["video"]["streamurl"] # self._html_search_regex(r'streamurl\":\"(.*?)\"', webpage, '%s'); + title = video_info["video"]["titel"] # self._html_search_regex(r'titel\":\"(.*?)\"', webpage, '%s'); + description = video_info["video"]["beschreibung"] # self._html_search_regex(r'beschreibung\":\"(.*?)\"', webpage, '%s'); + #ext = self._html_search_regex(r'streamurl\":\".*?(\..{2,4})\"', webpage, '%s'); + + thumbnail = video_info["video"]["image"] + + download_url = download_url.replace("\\", "") + + stream_url = 'mp4:' + self._html_search_regex(r'ondemand/(.*)', download_url, '%s'); + + #upload_date = self._html_search_regex(r'property=\"dc:date\".*?datatype=\"xsd:dateTime\".*?content=\"(.*?)\"', webpage, 'title') + #download_url += " -y " + stream_url + + #print stream_url + #print download_url + #print description + #print title + #print ext + + formats = [] + + fmt = { + 'url' : download_url, + #'app': 'ondemand?_fcs_vhost=cp108781.edgefcs.net', + 'play_path': stream_url, + #'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf', + #'page_url': 'http://www.cbsnews.com', + #'ext': ext, + } + + formats.append(fmt) + + + return { + 'id': video_id, + 'title': title, + 'thumbnail' : thumbnail, + 'description' : description, + 'formats': formats, + } From e5660ee6aebacee4b149648bee1a7ce1ec72c1e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 24 Jan 2015 18:52:26 +0100 Subject: [PATCH 036/327] [YoutubeDL] Fill the info dict 'http_headers' field with all the headers available Useful for external tools using the json output. The methods '_calc_headers' and '_calc_cookies' have been copied from the downloader/external, now they just use "info_dict['http_headers']". --- youtube_dl/YoutubeDL.py | 36 ++++++++++++++++++++++++++ youtube_dl/downloader/external.py | 43 +++---------------------------- 2 files changed, 39 insertions(+), 40 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 54e732943..4c3d45f48 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -56,6 +56,7 @@ from .utils import ( preferredencoding, SameFileError, sanitize_filename, + std_headers, subtitles_filename, takewhile_inclusive, UnavailableVideoError, @@ -865,6 +866,36 @@ class YoutubeDL(object): return matches[-1] return None + def _calc_headers(self, info_dict): + res = std_headers.copy() + + add_headers = info_dict.get('http_headers') + if add_headers: + res.update(add_headers) + + cookies = self._calc_cookies(info_dict) + if cookies: + res['Cookie'] = cookies + + return res + + def _calc_cookies(self, info_dict): + class _PseudoRequest(object): + def __init__(self, url): + self.url = url + self.headers = {} + self.unverifiable = False + + def add_unredirected_header(self, k, v): + self.headers[k] = v + + def get_full_url(self): + return self.url + + pr = _PseudoRequest(info_dict['url']) + self.cookiejar.add_cookie_header(pr) + return pr.headers.get('Cookie') + def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' @@ -933,6 +964,11 @@ class YoutubeDL(object): # Automatically determine file extension if missing if 'ext' not in format: format['ext'] = determine_ext(format['url']).lower() + # Add HTTP headers, so that external programs can use them from the + # json output + full_format_info = info_dict.copy() + full_format_info.update(format) + format['http_headers'] = self._calc_headers(full_format_info) format_limit = self.params.get('format_limit', None) if format_limit: diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 5bf24ccbb..af9fdba75 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -7,7 +7,6 @@ import sys from .common import FileDownloader from ..utils import ( encodeFilename, - std_headers, ) @@ -46,42 +45,6 @@ class ExternalFD(FileDownloader): def supports(cls, info_dict): return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps') - def _calc_headers(self, info_dict): - res = std_headers.copy() - - add_headers = info_dict.get('http_headers') - if add_headers: - res.update(add_headers) - - cookies = self._calc_cookies(info_dict) - if cookies: - res['Cookie'] = cookies - - return res - - def _calc_cookies(self, info_dict): - class _PseudoRequest(object): - def __init__(self, url): - self.url = url - self.headers = {} - self.unverifiable = False - - def add_unredirected_header(self, k, v): - self.headers[k] = v - - def get_full_url(self): - return self.url - - def is_unverifiable(self): - return self.unverifiable - - def has_header(self, h): - return h in self.headers - - pr = _PseudoRequest(info_dict['url']) - self.ydl.cookiejar.add_cookie_header(pr) - return pr.headers.get('Cookie') - def _call_downloader(self, tmpfilename, info_dict): """ Either overwrite this or implement _make_cmd """ cmd = self._make_cmd(tmpfilename, info_dict) @@ -107,7 +70,7 @@ class ExternalFD(FileDownloader): class CurlFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-o', tmpfilename] - for key, val in self._calc_headers(info_dict).items(): + for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] cmd += ['--', info_dict['url']] return cmd @@ -116,7 +79,7 @@ class CurlFD(ExternalFD): class WgetFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] - for key, val in self._calc_headers(info_dict).items(): + for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] cmd += ['--', info_dict['url']] return cmd @@ -131,7 +94,7 @@ class Aria2cFD(ExternalFD): if dn: cmd += ['--dir', dn] cmd += ['--out', os.path.basename(tmpfilename)] - for key, val in self._calc_headers(info_dict).items(): + for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] cmd += ['--', info_dict['url']] return cmd From 4b405cfc6e4f9184567b94ee2f8a4a9851a912b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 24 Jan 2015 20:05:35 +0100 Subject: [PATCH 037/327] [YoutubeDL._calc_cookies] Restore the 'has_header' method I didn't copied it from downloader/external --- youtube_dl/YoutubeDL.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4c3d45f48..458fd15ea 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -892,6 +892,9 @@ class YoutubeDL(object): def get_full_url(self): return self.url + def has_header(self, h): + return h in self.headers + pr = _PseudoRequest(info_dict['url']) self.cookiejar.add_cookie_header(pr) return pr.headers.get('Cookie') From 1070711d6003e6750b8cf803c3926b2e273a9e85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 24 Jan 2015 20:12:47 +0100 Subject: [PATCH 038/327] [YoutubeDL._calc_cookies] Restore the 'is_unverifiable' I should have check everything was copied before commiting 4b405cfc6e4f9184567b94ee2f8a4a9851a912b5. --- youtube_dl/YoutubeDL.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 458fd15ea..d6728b2dd 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -892,6 +892,9 @@ class YoutubeDL(object): def get_full_url(self): return self.url + def is_unverifiable(self): + return self.unverifiable + def has_header(self, h): return h in self.headers From 7906d199a11c0f8deccc0f87d7ee850106c361dc Mon Sep 17 00:00:00 2001 From: David-Development Date: Sat, 24 Jan 2015 18:28:16 +0100 Subject: [PATCH 039/327] [rtl2] Add new extractor --- youtube_dl/downloader/rtmp.py | 8 ++- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/rtl2.py | 100 +++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 2 deletions(-) create mode 100644 youtube_dl/extractor/rtl2.py diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index 6dbbc053c..6f7ad588e 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -104,7 +104,8 @@ class RtmpFD(FileDownloader): live = info_dict.get('rtmp_live', False) conn = info_dict.get('rtmp_conn', None) protocol = info_dict.get('rtmp_protocol', None) - + no_resume = info_dict.get('no_resume', False) + self.report_destination(filename) tmpfilename = self.temp_name(filename) test = self.params.get('test', False) @@ -141,7 +142,10 @@ class RtmpFD(FileDownloader): basic_args += ['--conn', conn] if protocol is not None: basic_args += ['--protocol', protocol] - args = basic_args + [[], ['--resume', '--skip', '1']][not live and self.params.get('continuedl', False)] + if not no_resume: + basic_args += ['--resume'] + + args = basic_args + [[], ['--skip', '1']][not live and self.params.get('continuedl', False)] if sys.platform == 'win32' and sys.version_info < (3, 0): # Windows subprocess module does not actually support Unicode diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3f7ca6f7d..03c56156a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -349,6 +349,7 @@ from .rtbf import RTBFIE from .rte import RteIE from .rtlnl import RtlXlIE from .rtlnow import RTLnowIE +from .rtl2 import RTL2IE from .rtp import RTPIE from .rts import RTSIE from .rtve import RTVEALaCartaIE, RTVELiveIE diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py new file mode 100644 index 000000000..14b45e86e --- /dev/null +++ b/youtube_dl/extractor/rtl2.py @@ -0,0 +1,100 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + clean_html, + unified_strdate, + int_or_none, +) + + +class RTL2IE(InfoExtractor): + """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW""" + _VALID_URL = r'http?://(?P(?P(www\.)?rtl2\.de)/.*/(?P.*))' + _TESTS = [{ + 'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0', + 'info_dict': { + 'id': 'folge-203-0', + 'ext': 'f4v', + 'title': 'GRIP sucht den Sommerk\xf6nig', + 'description' : 'Matthias, Det und Helge treten gegeneinander an.' + }, + 'params': { + # rtmp download + #'skip_download': True, + }, + }, + { + 'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/', + 'info_dict': { + 'id': '21040-anna-erwischt-alex', + 'ext': 'f4v', + 'title': 'GRIP sucht den Sommerk\xf6nig', + 'description' : 'Matthias, Det und Helge treten gegeneinander an.' + }, + 'params': { + # rtmp download + #'skip_download': True, + }, + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_page_url = 'http://%s/' % mobj.group('domain') + video_id = mobj.group('video_id') + + webpage = self._download_webpage('http://' + mobj.group('url'), video_id) + + vico_id = self._html_search_regex(r'vico_id\s*:\s*([0-9]+)', webpage, '%s'); + vivi_id = self._html_search_regex(r'vivi_id\s*:\s*([0-9]+)', webpage, '%s'); + + info_url = 'http://www.rtl2.de/video/php/get_video.php?vico_id=' + vico_id + '&vivi_id=' + vivi_id + webpage = self._download_webpage(info_url, '') + + video_info = json.loads(webpage) + + download_url = video_info["video"]["streamurl"] # self._html_search_regex(r'streamurl\":\"(.*?)\"', webpage, '%s'); + title = video_info["video"]["titel"] # self._html_search_regex(r'titel\":\"(.*?)\"', webpage, '%s'); + description = video_info["video"]["beschreibung"] # self._html_search_regex(r'beschreibung\":\"(.*?)\"', webpage, '%s'); + #ext = self._html_search_regex(r'streamurl\":\".*?(\..{2,4})\"', webpage, '%s'); + + thumbnail = video_info["video"]["image"] + + download_url = download_url.replace("\\", "") + + stream_url = 'mp4:' + self._html_search_regex(r'ondemand/(.*)', download_url, '%s') + + #print(download_url) + #print(stream_url) + #print(title) + #print(description) + #print(video_id) + + formats = [] + + fmt = { + 'url' : download_url, + #'app': 'ondemand?_fcs_vhost=cp108781.edgefcs.net', + 'play_path': stream_url, + 'player_url': 'http://www.rtl2.de/flashplayer/vipo_player.swf', + 'page_url': url, + 'flash_version' : "LNX 11,2,202,429", + 'rtmp_conn' : ["S:connect", "O:1", "NS:pageUrl:" + url, "NB:fpad:0", "NN:videoFunction:1", "O:0"], + 'no_resume' : 1, + } + + formats.append(fmt) + + return { + 'id': video_id, + 'title': title, + 'thumbnail' : thumbnail, + 'description' : description, + 'formats': formats, + } From 1e108029907ca28b75f37d2cf0bf25bcabbfbdac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 25 Jan 2015 05:21:39 +0600 Subject: [PATCH 040/327] [krasview] Fix extraction --- youtube_dl/extractor/krasview.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/krasview.py b/youtube_dl/extractor/krasview.py index 6f3d2345b..e46954b47 100644 --- a/youtube_dl/extractor/krasview.py +++ b/youtube_dl/extractor/krasview.py @@ -2,18 +2,17 @@ from __future__ import unicode_literals import json -import re from .common import InfoExtractor from ..utils import ( int_or_none, - unescapeHTML, + js_to_json, ) class KrasViewIE(InfoExtractor): IE_DESC = 'Красвью' - _VALID_URL = r'https?://krasview\.ru/video/(?P\d+)' + _VALID_URL = r'https?://krasview\.ru/(?:video|embed)/(?P\d+)' _TEST = { 'url': 'http://krasview.ru/video/512228', @@ -29,20 +28,18 @@ class KrasViewIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - flashvars = json.loads(self._search_regex( - r'flashvars\s*:\s*({.+?})\s*}\);', webpage, 'flashvars')) + flashvars = json.loads(js_to_json(self._search_regex( + r'video_Init\(({.+?})', webpage, 'flashvars'))) video_url = flashvars['url'] - title = unescapeHTML(flashvars['title']) - description = unescapeHTML(flashvars.get('subtitle') or self._og_search_description(webpage, default=None)) - thumbnail = flashvars['image'] - duration = int(flashvars['duration']) - filesize = int(flashvars['size']) + title = self._og_search_title(webpage) + description = self._og_search_description(webpage, default=None) + thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage) + duration = int_or_none(flashvars.get('duration')) width = int_or_none(self._og_search_property('video:width', webpage, 'video width')) height = int_or_none(self._og_search_property('video:height', webpage, 'video height')) @@ -53,7 +50,6 @@ class KrasViewIE(InfoExtractor): 'description': description, 'thumbnail': thumbnail, 'duration': duration, - 'filesize': filesize, 'width': width, 'height': height, } From cfb56d1af38f3e1e0251dbd8a20e3ed8884976ff Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 25 Jan 2015 02:38:47 +0100 Subject: [PATCH 041/327] Add --list-thumbnails --- test/test_utils.py | 11 ++++++++++ youtube_dl/YoutubeDL.py | 37 ++++++++++++++++++++++++++++---- youtube_dl/__init__.py | 1 + youtube_dl/extractor/common.py | 2 ++ youtube_dl/extractor/testtube.py | 16 ++++++++++++-- youtube_dl/options.py | 15 +++++++++---- youtube_dl/utils.py | 8 +++++++ 7 files changed, 80 insertions(+), 10 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index bdd7f268a..ebec7986f 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -52,6 +52,7 @@ from youtube_dl.utils import ( urlencode_postdata, version_tuple, xpath_with_ns, + render_table, ) @@ -434,5 +435,15 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') self.assertTrue(is_html( # UTF-32-LE b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00')) + def test_render_table(self): + self.assertEqual( + render_table( + ['a', 'bcd'], + [[123, 4], [9999, 51]]), + 'a bcd\n' + '123 4\n' + '9999 51') + + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index d6728b2dd..e0f5a0d74 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -54,6 +54,7 @@ from .utils import ( PostProcessingError, platform_name, preferredencoding, + render_table, SameFileError, sanitize_filename, std_headers, @@ -221,6 +222,8 @@ class YoutubeDL(object): youtube-dl servers for debugging. sleep_interval: Number of seconds to sleep before each download. external_downloader: Executable of the external downloader to call. + listformats: Print an overview of available video formats and exit. + list_thumbnails: Print a table of all thumbnails and exit. The following parameters are not used by YoutubeDL itself, they are used by @@ -916,9 +919,14 @@ class YoutubeDL(object): info_dict['playlist_index'] = None thumbnails = info_dict.get('thumbnails') + if thumbnails is None: + thumbnail = info_dict.get('thumbnail') + if thumbnail: + thumbnails = [{'url': thumbnail}] if thumbnails: thumbnails.sort(key=lambda t: ( - t.get('width'), t.get('height'), t.get('url'))) + t.get('preference'), t.get('width'), t.get('height'), + t.get('id'), t.get('url'))) for t in thumbnails: if 'width' in t and 'height' in t: t['resolution'] = '%dx%d' % (t['width'], t['height']) @@ -990,9 +998,12 @@ class YoutubeDL(object): # element in the 'formats' field in info_dict is info_dict itself, # wich can't be exported to json info_dict['formats'] = formats - if self.params.get('listformats', None): + if self.params.get('listformats'): self.list_formats(info_dict) return + if self.params.get('list_thumbnails'): + self.list_thumbnails(info_dict) + return req_format = self.params.get('format') if req_format is None: @@ -1500,8 +1511,26 @@ class YoutubeDL(object): header_line = line({ 'format_id': 'format code', 'ext': 'extension', 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen) - self.to_screen('[info] Available formats for %s:\n%s\n%s' % - (info_dict['id'], header_line, '\n'.join(formats_s))) + self.to_screen( + '[info] Available formats for %s:\n%s\n%s' % + (info_dict['id'], header_line, '\n'.join(formats_s))) + + def list_thumbnails(self, info_dict): + thumbnails = info_dict.get('thumbnails') + if not thumbnails: + tn_url = info_dict.get('thumbnail') + if tn_url: + thumbnails = [{'id': '0', 'url': tn_url}] + else: + self.to_screen( + '[info] No thumbnails present for %s' % info_dict['id']) + return + + self.to_screen( + '[info] Thumbnails for %s:' % info_dict['id']) + self.to_screen(render_table( + ['ID', 'width', 'height', 'URL'], + [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])) def urlopen(self, req): """ Start an HTTP download """ diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 3fc7dc5c2..a3f82612c 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -331,6 +331,7 @@ def _real_main(argv=None): 'call_home': opts.call_home, 'sleep_interval': opts.sleep_interval, 'external_downloader': opts.external_downloader, + 'list_thumbnails': opts.list_thumbnails, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 523400062..7b7a832dc 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -129,7 +129,9 @@ class InfoExtractor(object): something like "4234987", title "Dancing naked mole rats", and display_id "dancing-naked-mole-rats" thumbnails: A list of dictionaries, with the following entries: + * "id" (optional, string) - Thumbnail format ID * "url" + * "preference" (optional, int) - quality of the image * "width" (optional, int) * "height" (optional, int) * "resolution" (optional, string "{width}x{height"}, diff --git a/youtube_dl/extractor/testtube.py b/youtube_dl/extractor/testtube.py index fd47e71a2..6a7b5e49d 100644 --- a/youtube_dl/extractor/testtube.py +++ b/youtube_dl/extractor/testtube.py @@ -1,7 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + qualities, +) class TestTubeIE(InfoExtractor): @@ -46,13 +49,22 @@ class TestTubeIE(InfoExtractor): self._sort_formats(formats) duration = int_or_none(info.get('duration')) + images = info.get('images') + thumbnails = None + preference = qualities(['mini', 'small', 'medium', 'large']) + if images: + thumbnails = [{ + 'id': thumbnail_id, + 'url': img_url, + 'preference': preference(thumbnail_id) + } for thumbnail_id, img_url in images.items()] return { 'id': video_id, 'display_id': display_id, 'title': info['title'], 'description': info.get('summary'), - 'thumbnail': info.get('images', {}).get('large'), + 'thumbnails': thumbnails, 'uploader': info.get('show', {}).get('name'), 'uploader_id': info.get('show', {}).get('slug'), 'duration': duration, diff --git a/youtube_dl/options.py b/youtube_dl/options.py index b38b8349f..e3b4b8a8a 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -614,10 +614,6 @@ def parseOpts(overrideArguments=None): '--write-annotations', action='store_true', dest='writeannotations', default=False, help='write video annotations to a .annotation file') - filesystem.add_option( - '--write-thumbnail', - action='store_true', dest='writethumbnail', default=False, - help='write thumbnail image to disk') filesystem.add_option( '--load-info', dest='load_info_filename', metavar='FILE', @@ -637,6 +633,16 @@ def parseOpts(overrideArguments=None): action='store_true', dest='rm_cachedir', help='Delete all filesystem cache files') + thumbnail = optparse.OptionGroup(parser, 'Thumbnail images') + thumbnail.add_option( + '--write-thumbnail', + action='store_true', dest='writethumbnail', default=False, + help='write thumbnail image to disk') + thumbnail.add_option( + '--list-thumbnails', + action='store_true', dest='list_thumbnails', default=False, + help='Simulate and list all available thumbnail formats') + postproc = optparse.OptionGroup(parser, 'Post-processing Options') postproc.add_option( '-x', '--extract-audio', @@ -702,6 +708,7 @@ def parseOpts(overrideArguments=None): parser.add_option_group(selection) parser.add_option_group(downloader) parser.add_option_group(filesystem) + parser.add_option_group(thumbnail) parser.add_option_group(verbosity) parser.add_option_group(workarounds) parser.add_option_group(video_format) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d22b03134..b8c52af74 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1659,3 +1659,11 @@ def determine_protocol(info_dict): return 'f4m' return compat_urllib_parse_urlparse(url).scheme + + +def render_table(header_row, data): + """ Render a list of rows, each as a list of values """ + table = [header_row] + data + max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)] + format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s' + return '\n'.join(format_str % tuple(row) for row in table) From ec82d85acdc497436e7e2e767088d3ecb5947f68 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 25 Jan 2015 03:11:12 +0100 Subject: [PATCH 042/327] [YoutubeDL] Implement --write-all-thumbnails (Closes #2269) --- youtube_dl/YoutubeDL.py | 57 +++++++++++++++++++++++++++-------------- youtube_dl/__init__.py | 1 + youtube_dl/options.py | 4 +++ 3 files changed, 43 insertions(+), 19 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e0f5a0d74..0e73dc8ff 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -146,6 +146,7 @@ class YoutubeDL(object): writeinfojson: Write the video description to a .info.json file writeannotations: Write the video annotations to a .annotations.xml file writethumbnail: Write the thumbnail image to a file + write_all_thumbnails: Write all thumbnail formats to files writesubtitles: Write the video subtitles to a file writeautomaticsub: Write the automatic subtitles to a file allsubtitles: Downloads all the subtitles of the video @@ -1210,25 +1211,7 @@ class YoutubeDL(object): self.report_error('Cannot write metadata to JSON file ' + infofn) return - if self.params.get('writethumbnail', False): - if info_dict.get('thumbnail') is not None: - thumb_format = determine_ext(info_dict['thumbnail'], 'jpg') - thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format - if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)): - self.to_screen('[%s] %s: Thumbnail is already present' % - (info_dict['extractor'], info_dict['id'])) - else: - self.to_screen('[%s] %s: Downloading thumbnail ...' % - (info_dict['extractor'], info_dict['id'])) - try: - uf = self.urlopen(info_dict['thumbnail']) - with open(thumb_filename, 'wb') as thumbf: - shutil.copyfileobj(uf, thumbf) - self.to_screen('[%s] %s: Writing thumbnail to: %s' % - (info_dict['extractor'], info_dict['id'], thumb_filename)) - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self.report_warning('Unable to download thumbnail "%s": %s' % - (info_dict['thumbnail'], compat_str(err))) + self._write_thumbnails(info_dict, filename) if not self.params.get('skip_download', False): try: @@ -1676,3 +1659,39 @@ class YoutubeDL(object): if encoding is None: encoding = preferredencoding() return encoding + + def _write_thumbnails(self, info_dict, filename): + if self.params.get('writethumbnail', False): + thumbnails = info_dict.get('thumbnails') + if thumbnails: + thumbnails = [thumbnails[-1]] + elif self.params.get('write_all_thumbnails', False): + thumbnails = info_dict.get('thumbnails') + else: + return + + if not thumbnails: + # No thumbnails present, so return immediately + return + + for t in thumbnails: + thumb_ext = determine_ext(t['url'], 'jpg') + suffix = '_%s' % t['id'] if len(thumbnails) > 1 else '' + thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else '' + thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext + + if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)): + self.to_screen('[%s] %s: Thumbnail %sis already present' % + (info_dict['extractor'], info_dict['id'], thumb_display_id)) + else: + self.to_screen('[%s] %s: Downloading thumbnail %s...' % + (info_dict['extractor'], info_dict['id'], thumb_display_id)) + try: + uf = self.urlopen(t['url']) + with open(thumb_filename, 'wb') as thumbf: + shutil.copyfileobj(uf, thumbf) + self.to_screen('[%s] %s: Writing thumbnail %sto: %s' % + (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename)) + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self.report_warning('Unable to download thumbnail "%s": %s' % + (t['url'], compat_str(err))) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index a3f82612c..04f668334 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -286,6 +286,7 @@ def _real_main(argv=None): 'writeannotations': opts.writeannotations, 'writeinfojson': opts.writeinfojson, 'writethumbnail': opts.writethumbnail, + 'write_all_thumbnails': opts.write_all_thumbnails, 'writesubtitles': opts.writesubtitles, 'writeautomaticsub': opts.writeautomaticsub, 'allsubtitles': opts.allsubtitles, diff --git a/youtube_dl/options.py b/youtube_dl/options.py index e3b4b8a8a..a3b012ddb 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -638,6 +638,10 @@ def parseOpts(overrideArguments=None): '--write-thumbnail', action='store_true', dest='writethumbnail', default=False, help='write thumbnail image to disk') + thumbnail.add_option( + '--write-all-thumbnails', + action='store_true', dest='write_all_thumbnails', default=False, + help='write all thumbnail image formats to disk') thumbnail.add_option( '--list-thumbnails', action='store_true', dest='list_thumbnails', default=False, From c14e88f0f561c5ac0a1cb9e6764fe4702bd9f7ca Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 25 Jan 2015 04:24:55 +0100 Subject: [PATCH 043/327] [YoutubeDL] Add --playlist-items option (Fixes #2662) --- AUTHORS | 1 + youtube_dl/YoutubeDL.py | 38 +++++++++++++++++++++++++++++++++----- youtube_dl/__init__.py | 1 + youtube_dl/options.py | 4 ++++ 4 files changed, 39 insertions(+), 5 deletions(-) diff --git a/AUTHORS b/AUTHORS index b8bf3cb6f..8362b6d8a 100644 --- a/AUTHORS +++ b/AUTHORS @@ -104,3 +104,4 @@ Ondřej Caletka Dinesh S Johan K. Jensen Yen Chi Hsuan +Enam Mijbah Noor diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 0e73dc8ff..b7e93b8dd 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -137,6 +137,7 @@ class YoutubeDL(object): nooverwrites: Prevent overwriting files. playliststart: Playlist item to start at. playlistend: Playlist item to end at. + playlist_items: Specific indices of playlist to download. playlistreverse: Download playlist items in reverse order. matchtitle: Download only matching titles. rejecttitle: Reject downloads for matching titles. @@ -703,24 +704,51 @@ class YoutubeDL(object): if playlistend == -1: playlistend = None + playlistitems_str = self.params.get('playlist_items', None) + playlistitems = None + if playlistitems_str is not None: + def iter_playlistitems(format): + for string_segment in format.split(','): + if '-' in string_segment: + start, end = string_segment.split('-') + for item in range(int(start), int(end) + 1): + yield int(item) + else: + yield int(string_segment) + playlistitems = iter_playlistitems(playlistitems_str) + ie_entries = ie_result['entries'] if isinstance(ie_entries, list): n_all_entries = len(ie_entries) - entries = ie_entries[playliststart:playlistend] + if playlistitems: + entries = [ie_entries[i - 1] for i in playlistitems] + else: + entries = ie_entries[playliststart:playlistend] n_entries = len(entries) self.to_screen( "[%s] playlist %s: Collected %d video ids (downloading %d of them)" % (ie_result['extractor'], playlist, n_all_entries, n_entries)) elif isinstance(ie_entries, PagedList): - entries = ie_entries.getslice( - playliststart, playlistend) + if playlistitems: + entries = [] + for item in playlistitems: + entries.extend(ie_entries.getslice( + item - 1, item + )) + else: + entries = ie_entries.getslice( + playliststart, playlistend) n_entries = len(entries) self.to_screen( "[%s] playlist %s: Downloading %d videos" % (ie_result['extractor'], playlist, n_entries)) else: # iterable - entries = list(itertools.islice( - ie_entries, playliststart, playlistend)) + if playlistitems: + entry_list = list(ie_entries) + entries = [entry_list[i - 1] for i in playlistitems] + else: + entries = list(itertools.islice( + ie_entries, playliststart, playlistend)) n_entries = len(entries) self.to_screen( "[%s] playlist %s: Downloading %d videos" % diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 04f668334..0bd7b68c3 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -333,6 +333,7 @@ def _real_main(argv=None): 'sleep_interval': opts.sleep_interval, 'external_downloader': opts.external_downloader, 'list_thumbnails': opts.list_thumbnails, + 'playlist_items': opts.playlist_items, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/options.py b/youtube_dl/options.py index a3b012ddb..872835295 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -200,6 +200,10 @@ def parseOpts(overrideArguments=None): '--playlist-end', dest='playlistend', metavar='NUMBER', default=None, type=int, help='playlist video to end at (default is last)') + selection.add_option( + '--playlist-items', + dest='playlist_items', metavar='ITEM_SPEC', default=None, + help='playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.') selection.add_option( '--match-title', dest='matchtitle', metavar='REGEX', From baeaeffce550b23848983cd281f173a7906cd7f9 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 25 Jan 2015 04:34:38 +0100 Subject: [PATCH 044/327] [options] Add support for infinite retries (Fixes #507) --- youtube_dl/__init__.py | 13 ++++++++----- youtube_dl/options.py | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 0bd7b68c3..09da8802d 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -143,10 +143,13 @@ def _real_main(argv=None): parser.error('invalid max_filesize specified') opts.max_filesize = numeric_limit if opts.retries is not None: - try: - opts.retries = int(opts.retries) - except (TypeError, ValueError): - parser.error('invalid retry count specified') + if opts.retries in ('inf', 'infinite'): + opts_retries = float('inf') + else: + try: + opts_retries = int(opts.retries) + except (TypeError, ValueError): + parser.error('invalid retry count specified') if opts.buffersize is not None: numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) if numeric_buffersize is None: @@ -268,7 +271,7 @@ def _real_main(argv=None): 'ignoreerrors': opts.ignoreerrors, 'ratelimit': opts.ratelimit, 'nooverwrites': opts.nooverwrites, - 'retries': opts.retries, + 'retries': opts_retries, 'buffersize': opts.buffersize, 'noresizebuffer': opts.noresizebuffer, 'continuedl': opts.continue_dl, diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 872835295..1ddbdbc78 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -377,7 +377,7 @@ def parseOpts(overrideArguments=None): downloader.add_option( '-R', '--retries', dest='retries', metavar='RETRIES', default=10, - help='number of retries (default is %default)') + help='number of retries (default is %default), or "infinite".') downloader.add_option( '--buffer-size', dest='buffersize', metavar='SIZE', default='1024', From 881e6a1f5c47a65348879f817ad833081e8c5ada Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 25 Jan 2015 04:49:44 +0100 Subject: [PATCH 045/327] Add --xattr-set-filesize option (Fixes #1348) --- youtube_dl/YoutubeDL.py | 3 ++- youtube_dl/__init__.py | 6 ++++++ youtube_dl/downloader/common.py | 32 +++++++++++++++++--------------- youtube_dl/downloader/http.py | 8 ++++++++ youtube_dl/options.py | 4 ++++ 5 files changed, 37 insertions(+), 16 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index b7e93b8dd..0241f7e3c 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -231,7 +231,8 @@ class YoutubeDL(object): The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, - noresizebuffer, retries, continuedl, noprogress, consoletitle + noresizebuffer, retries, continuedl, noprogress, consoletitle, + xattr_set_filesize. The following options are used by the post processors: prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available, diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 09da8802d..112a8ba60 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -241,6 +241,11 @@ def _real_main(argv=None): 'verboseOutput': opts.verbose, 'exec_cmd': opts.exec_cmd, }) + if opts.xattr_set_filesize: + try: + import xattr + except ImportError: + parser.error('setting filesize xattr requested but python-xattr is not available') ydl_opts = { 'usenetrc': opts.usenetrc, @@ -337,6 +342,7 @@ def _real_main(argv=None): 'external_downloader': opts.external_downloader, 'list_thumbnails': opts.list_thumbnails, 'playlist_items': opts.playlist_items, + 'xattr_set_filesize': opts.xattr_set_filesize, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index c35c42c1d..7bb3a948d 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -25,21 +25,23 @@ class FileDownloader(object): Available options: - verbose: Print additional info to stdout. - quiet: Do not print messages to stdout. - ratelimit: Download speed limit, in bytes/sec. - retries: Number of times to retry for HTTP error 5xx - buffersize: Size of download buffer in bytes. - noresizebuffer: Do not automatically resize the download buffer. - continuedl: Try to continue downloads if possible. - noprogress: Do not print the progress bar. - logtostderr: Log messages to stderr instead of stdout. - consoletitle: Display progress in console window's titlebar. - nopart: Do not use temporary .part files. - updatetime: Use the Last-modified header to set output file timestamps. - test: Download only first bytes to test the downloader. - min_filesize: Skip files smaller than this size - max_filesize: Skip files larger than this size + verbose: Print additional info to stdout. + quiet: Do not print messages to stdout. + ratelimit: Download speed limit, in bytes/sec. + retries: Number of times to retry for HTTP error 5xx + buffersize: Size of download buffer in bytes. + noresizebuffer: Do not automatically resize the download buffer. + continuedl: Try to continue downloads if possible. + noprogress: Do not print the progress bar. + logtostderr: Log messages to stderr instead of stdout. + consoletitle: Display progress in console window's titlebar. + nopart: Do not use temporary .part files. + updatetime: Use the Last-modified header to set output file timestamps. + test: Download only first bytes to test the downloader. + min_filesize: Skip files smaller than this size + max_filesize: Skip files larger than this size + xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. + (experimenatal) Subclasses of this one must re-define the real_download method. """ diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 4db50ee90..8a1d578d5 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -157,6 +157,14 @@ class HttpFD(FileDownloader): except (OSError, IOError) as err: self.report_error('unable to open for writing: %s' % str(err)) return False + + if self.params.get('xattr_set_filesize', False) and data_len is not None: + try: + import xattr + xattr.setxattr(tmpfilename, 'user.ytdl.filesize', str(data_len)) + except(OSError, IOError, ImportError) as err: + self.report_error('unable to set filesize xattr: %s' % str(err)) + try: stream.write(data_block) except (IOError, OSError) as err: diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 1ddbdbc78..dbc6f5528 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -394,6 +394,10 @@ def parseOpts(overrideArguments=None): '--playlist-reverse', action='store_true', help='Download playlist videos in reverse order') + downloader.add_option( + '--xattr-set-filesize', + dest='xattr_set_filesize', action='store_true', + help='(experimental) set file xattribute ytdl.filesize with expected filesize') downloader.add_option( '--external-downloader', dest='external_downloader', metavar='COMMAND', From e1ccc04e9f68988df0520b7502edc6479b62378f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 25 Jan 2015 04:56:32 +0100 Subject: [PATCH 046/327] Test rtmpdump on travis (Fixes #1601) --- .travis.yml | 3 +++ youtube_dl/extractor/folketinget.py | 4 +--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index f14014414..fb34299fc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,9 @@ python: - "2.7" - "3.3" - "3.4" +before_install: + - sudo apt-get update -qq + - sudo apt-get install -yqq rtmpdump script: nosetests test --verbose notifications: email: diff --git a/youtube_dl/extractor/folketinget.py b/youtube_dl/extractor/folketinget.py index 68e2db943..0fb29de75 100644 --- a/youtube_dl/extractor/folketinget.py +++ b/youtube_dl/extractor/folketinget.py @@ -16,6 +16,7 @@ class FolketingetIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ft\.dk/webtv/video/[^?#]*?\.(?P[0-9]+)\.aspx' _TEST = { 'url': 'http://www.ft.dk/webtv/video/20141/eru/td.1165642.aspx?as=1#player', + 'md5': '6269e8626fa1a891bf5369b386ae996a', 'info_dict': { 'id': '1165642', 'ext': 'mp4', @@ -29,9 +30,6 @@ class FolketingetIE(InfoExtractor): 'upload_date': '20141120', 'duration': 3960, }, - 'params': { - 'skip_download': 'rtmpdump required', - } } def _real_extract(self, url): From 7d346331b53498b5966790677e0b41631edd8a4e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 25 Jan 2015 05:15:47 +0100 Subject: [PATCH 047/327] [audiomack:album] Update testcase --- test/helper.py | 2 +- youtube_dl/extractor/audiomack.py | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/test/helper.py b/test/helper.py index c416f388c..27a68091f 100644 --- a/test/helper.py +++ b/test/helper.py @@ -140,7 +140,7 @@ def expect_info_dict(self, got_dict, expected_dict): # Are checkable fields missing from the test case definition? test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) for key, value in got_dict.items() - if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location')) + if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location')) missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys()) if missing_keys: def _repr(v): diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py index 8bfe50214..693ba22c6 100644 --- a/youtube_dl/extractor/audiomack.py +++ b/youtube_dl/extractor/audiomack.py @@ -88,16 +88,21 @@ class AudiomackAlbumIE(InfoExtractor): # Album playlist ripped from fakeshoredrive with no metadata { 'url': 'http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project', + 'info_dict': { + 'title': 'PPP (Pistol P Project)', + 'id': '837572', + }, 'playlist': [{ 'info_dict': { - 'title': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu', - 'id': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu', + 'title': 'PPP (Pistol P Project) - 9. Heaven or Hell (CHIMACA) ft Zuse (prod by DJ FU)', + 'id': '837577', 'ext': 'mp3', + 'uploader': 'Lil Herb a.k.a. G Herbo', } }], 'params': { - 'playliststart': 8, - 'playlistend': 8, + 'playliststart': 9, + 'playlistend': 9, } } ] From 37f4ce538ab572812465fa6d0e429bf767a030fd Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 25 Jan 2015 05:17:15 +0100 Subject: [PATCH 048/327] [smotri] Fix test case --- youtube_dl/extractor/smotri.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index 26f361c93..e94f41362 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -102,6 +102,7 @@ class SmotriIE(InfoExtractor): 'uploader_id': 'mopeder', 'duration': 71, 'thumbnail': 'http://frame9.loadup.ru/d7/32/2888853.2.3.jpg', + 'upload_date': '20150114', }, }, # swf player From cc1237f4845e108017687d6727c3dfe6efc3cb4d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 25 Jan 2015 05:17:38 +0100 Subject: [PATCH 049/327] [__init__] Work around flake8 false positive --- youtube_dl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 112a8ba60..71d2c6f35 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -244,6 +244,7 @@ def _real_main(argv=None): if opts.xattr_set_filesize: try: import xattr + xattr # Confuse flake8 except ImportError: parser.error('setting filesize xattr requested but python-xattr is not available') From 8604e882a8f99fcd632efbb94e449477d860218d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 25 Jan 2015 05:23:21 +0100 Subject: [PATCH 050/327] [ubu] Fix test and modernize --- youtube_dl/extractor/ubu.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/ubu.py b/youtube_dl/extractor/ubu.py index 0182d67ec..d50237758 100644 --- a/youtube_dl/extractor/ubu.py +++ b/youtube_dl/extractor/ubu.py @@ -3,50 +3,51 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + qualities, +) class UbuIE(InfoExtractor): _VALID_URL = r'http://(?:www\.)?ubu\.com/film/(?P[\da-z_-]+)\.html' _TEST = { 'url': 'http://ubu.com/film/her_noise.html', - 'md5': '8edd46ee8aa6b265fb5ed6cf05c36bc9', + 'md5': '138d5652618bf0f03878978db9bef1ee', 'info_dict': { 'id': 'her_noise', - 'ext': 'mp4', + 'ext': 'm4v', 'title': 'Her Noise - The Making Of (2007)', 'duration': 3600, }, } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._html_search_regex( r'.+?Film & Video: ([^<]+)', webpage, 'title') duration = int_or_none(self._html_search_regex( - r'Duration: (\d+) minutes', webpage, 'duration', fatal=False, default=None)) - if duration: - duration *= 60 + r'Duration: (\d+) minutes', webpage, 'duration', fatal=False), + invscale=60) formats = [] - FORMAT_REGEXES = [ - ['sq', r"'flashvars'\s*,\s*'file=([^']+)'"], - ['hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"'] + ('sq', r"'flashvars'\s*,\s*'file=([^']+)'"), + ('hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"'), ] - + preference = qualities([fid for fid, _ in FORMAT_REGEXES]) for format_id, format_regex in FORMAT_REGEXES: m = re.search(format_regex, webpage) if m: formats.append({ 'url': m.group(1), 'format_id': format_id, + 'preference': preference(format_id), }) + self._sort_formats(formats) return { 'id': video_id, From ee69b99af6d00202a5ad8510604c807876d1b7b1 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 25 Jan 2015 06:15:51 +0100 Subject: [PATCH 051/327] [YoutubeDL] clarify hook documentation --- youtube_dl/YoutubeDL.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 0241f7e3c..b772f87f1 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -198,11 +198,12 @@ class YoutubeDL(object): postprocessor. progress_hooks: A list of functions that get called on download progress, with a dictionary with the entries - * filename: The final filename - * status: One of "downloading" and "finished" - - The dict may also have some of the following entries: + * status: One of "downloading" and "finished". + Check this first and ignore unknown values. + If status is one of "downloading" or "finished", the + following properties may also be present: + * filename: The final filename (always present) * downloaded_bytes: Bytes on disk * total_bytes: Size of the whole file, None if unknown * tmpfilename: The filename we're currently writing to @@ -1251,6 +1252,7 @@ class YoutubeDL(object): if self.params.get('verbose'): self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) return fd.download(name, info) + if info_dict.get('requested_formats') is not None: downloaded = [] success = True From fe41ddbb285abccc3c4d7a3ebc1238c13ec72577 Mon Sep 17 00:00:00 2001 From: David Development Date: Sun, 25 Jan 2015 11:53:53 +0100 Subject: [PATCH 052/327] refactoring - bug fixes --- youtube_dl/extractor/rtl2.py | 66 ++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py index 14b45e86e..7086d698f 100644 --- a/youtube_dl/extractor/rtl2.py +++ b/youtube_dl/extractor/rtl2.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor from ..utils import ( @@ -14,82 +13,83 @@ from ..utils import ( class RTL2IE(InfoExtractor): - """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW""" - _VALID_URL = r'http?://(?P(?P(www\.)?rtl2\.de)/.*/(?P.*))' + """Information Extractor for RTL2""" + _VALID_URL = r'http?://(?P(?P(www\.)?rtl2\.de)/.*/(?P.*))/' _TESTS = [{ 'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0', 'info_dict': { 'id': 'folge-203-0', 'ext': 'f4v', - 'title': 'GRIP sucht den Sommerk\xf6nig', + 'title': 'GRIP sucht den Sommerkönig', 'description' : 'Matthias, Det und Helge treten gegeneinander an.' }, 'params': { # rtmp download - #'skip_download': True, + 'skip_download': True, }, }, { 'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/', 'info_dict': { 'id': '21040-anna-erwischt-alex', - 'ext': 'f4v', - 'title': 'GRIP sucht den Sommerk\xf6nig', - 'description' : 'Matthias, Det und Helge treten gegeneinander an.' + 'ext': 'mp4', + 'title': 'Anna erwischt Alex!', + 'description' : 'Anna ist Alex\' Tochter bei Köln 50667.' }, 'params': { # rtmp download - #'skip_download': True, + 'skip_download': True, }, }, ] def _real_extract(self, url): + + #Some rtl2 urls have no slash at the end, so append it. + if not url.endswith("/"): + url += '/' + mobj = re.match(self._VALID_URL, url) - video_page_url = 'http://%s/' % mobj.group('domain') video_id = mobj.group('video_id') - - webpage = self._download_webpage('http://' + mobj.group('url'), video_id) - vico_id = self._html_search_regex(r'vico_id\s*:\s*([0-9]+)', webpage, '%s'); - vivi_id = self._html_search_regex(r'vivi_id\s*:\s*([0-9]+)', webpage, '%s'); + webpage = self._download_webpage(url, video_id) + + vico_id = self._html_search_regex(r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id not found'); + vivi_id = self._html_search_regex(r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id not found'); info_url = 'http://www.rtl2.de/video/php/get_video.php?vico_id=' + vico_id + '&vivi_id=' + vivi_id webpage = self._download_webpage(info_url, '') - video_info = json.loads(webpage) + video_info = self._download_json(info_url, video_id) - download_url = video_info["video"]["streamurl"] # self._html_search_regex(r'streamurl\":\"(.*?)\"', webpage, '%s'); - title = video_info["video"]["titel"] # self._html_search_regex(r'titel\":\"(.*?)\"', webpage, '%s'); - description = video_info["video"]["beschreibung"] # self._html_search_regex(r'beschreibung\":\"(.*?)\"', webpage, '%s'); - #ext = self._html_search_regex(r'streamurl\":\".*?(\..{2,4})\"', webpage, '%s'); + download_url = video_info["video"]["streamurl"] + title = video_info["video"]["titel"] + description = video_info["video"]["beschreibung"] thumbnail = video_info["video"]["image"] download_url = download_url.replace("\\", "") stream_url = 'mp4:' + self._html_search_regex(r'ondemand/(.*)', download_url, '%s') - - #print(download_url) - #print(stream_url) - #print(title) - #print(description) - #print(video_id) - formats = [] - - fmt = { - 'url' : download_url, + #Debug output + #print('URL: ' + url) + #print('DL URL: ' + download_url) + #print('Stream URL: ' + stream_url) + #print('Title: ' + title) + #print('Description: '+ description) + #print('Video ID: ' + video_id) + + formats = [{ + 'url' : download_url, #'app': 'ondemand?_fcs_vhost=cp108781.edgefcs.net', 'play_path': stream_url, 'player_url': 'http://www.rtl2.de/flashplayer/vipo_player.swf', 'page_url': url, 'flash_version' : "LNX 11,2,202,429", 'rtmp_conn' : ["S:connect", "O:1", "NS:pageUrl:" + url, "NB:fpad:0", "NN:videoFunction:1", "O:0"], - 'no_resume' : 1, - } - - formats.append(fmt) + 'no_resume' : True, + }] return { 'id': video_id, From c9326b38b824eb8babbf2306b3bfe4bd5e683cfe Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 25 Jan 2015 18:09:09 +0100 Subject: [PATCH 053/327] flake8: Ignore .git --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 13dcd8af6..02aa56257 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,5 +2,5 @@ universal = True [flake8] -exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build +exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build,.git ignore = E501 From 3dee7826e7133fb4a73aa0aabaee78499e49264f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 25 Jan 2015 18:09:48 +0100 Subject: [PATCH 054/327] [rtl2] PEP8, simplify, make rtmp tests run (#470) --- youtube_dl/downloader/rtmp.py | 13 ++-- youtube_dl/extractor/common.py | 7 +- youtube_dl/extractor/rtl2.py | 116 +++++++++++++-------------------- 3 files changed, 57 insertions(+), 79 deletions(-) diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index 6f7ad588e..e06ebe826 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -105,7 +105,8 @@ class RtmpFD(FileDownloader): conn = info_dict.get('rtmp_conn', None) protocol = info_dict.get('rtmp_protocol', None) no_resume = info_dict.get('no_resume', False) - + continue_dl = info_dict.get('continuedl', False) + self.report_destination(filename) tmpfilename = self.temp_name(filename) test = self.params.get('test', False) @@ -142,10 +143,12 @@ class RtmpFD(FileDownloader): basic_args += ['--conn', conn] if protocol is not None: basic_args += ['--protocol', protocol] - if not no_resume: - basic_args += ['--resume'] - - args = basic_args + [[], ['--skip', '1']][not live and self.params.get('continuedl', False)] + + args = basic_args + if not no_resume and continue_dl and not live: + args += ['--resume'] + if not live and continue_dl: + args += ['--skip', '1'] if sys.platform == 'win32' and sys.version_info < (3, 0): # Windows subprocess module does not actually support Unicode diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7b7a832dc..388c55e99 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -114,8 +114,11 @@ class InfoExtractor(object): * http_post_data Additional data to send with a POST request. * stretched_ratio If given and not 1, indicates that the - video's pixels are not square. - width : height ratio as float. + video's pixels are not square. + width : height ratio as float. + * no_resume The server does not support resuming the + (HTTP or RTMP) download. Boolean. + url: Final video URL. ext: Video filename extension. format: The video format, defaults to ext (used for --get-format) diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py index 7086d698f..72cd80498 100644 --- a/youtube_dl/extractor/rtl2.py +++ b/youtube_dl/extractor/rtl2.py @@ -1,100 +1,72 @@ # encoding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( - ExtractorError, - clean_html, - unified_strdate, - int_or_none, -) class RTL2IE(InfoExtractor): - """Information Extractor for RTL2""" - _VALID_URL = r'http?://(?P(?P(www\.)?rtl2\.de)/.*/(?P.*))/' + _VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P[^?#/]*?)(?:$|/(?:$|[?#]))' _TESTS = [{ - 'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0', - 'info_dict': { - 'id': 'folge-203-0', - 'ext': 'f4v', - 'title': 'GRIP sucht den Sommerkönig', - 'description' : 'Matthias, Det und Helge treten gegeneinander an.' - }, - 'params': { - # rtmp download - 'skip_download': True, - }, + 'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0', + 'md5': 'bfcc179030535b08dc2b36b469b5adc7', + 'info_dict': { + 'id': 'folge-203-0', + 'ext': 'f4v', + 'title': 'GRIP sucht den Sommerkönig', + 'description': 'Matthias, Det und Helge treten gegeneinander an.' }, - { - 'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/', - 'info_dict': { - 'id': '21040-anna-erwischt-alex', - 'ext': 'mp4', - 'title': 'Anna erwischt Alex!', - 'description' : 'Anna ist Alex\' Tochter bei Köln 50667.' - }, - 'params': { - # rtmp download - 'skip_download': True, - }, + }, { + 'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/', + 'md5': 'ffcd517d2805b57ce11a58a2980c2b02', + 'info_dict': { + 'id': '21040-anna-erwischt-alex', + 'ext': 'mp4', + 'title': 'Anna erwischt Alex!', + 'description': 'Anna ist Alex\' Tochter bei Köln 50667.' }, - ] + }] def _real_extract(self, url): - - #Some rtl2 urls have no slash at the end, so append it. - if not url.endswith("/"): + # Some rtl2 urls have no slash at the end, so append it. + if not url.endswith('/'): url += '/' - - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('video_id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - vico_id = self._html_search_regex(r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id not found'); - vivi_id = self._html_search_regex(r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id not found'); - + vico_id = self._html_search_regex( + r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id') + vivi_id = self._html_search_regex( + r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id') info_url = 'http://www.rtl2.de/video/php/get_video.php?vico_id=' + vico_id + '&vivi_id=' + vivi_id webpage = self._download_webpage(info_url, '') - video_info = self._download_json(info_url, video_id) + info = self._download_json(info_url, video_id) + video_info = info['video'] + title = video_info['titel'] + description = video_info.get('beschreibung') + thumbnail = video_info.get('image') - download_url = video_info["video"]["streamurl"] - title = video_info["video"]["titel"] - description = video_info["video"]["beschreibung"] + download_url = video_info['streamurl'] + download_url = download_url.replace('\\', '') + stream_url = 'mp4:' + self._html_search_regex(r'ondemand/(.*)', download_url, 'stream URL') + rtmp_conn = ["S:connect", "O:1", "NS:pageUrl:" + url, "NB:fpad:0", "NN:videoFunction:1", "O:0"] - thumbnail = video_info["video"]["image"] - - download_url = download_url.replace("\\", "") - - stream_url = 'mp4:' + self._html_search_regex(r'ondemand/(.*)', download_url, '%s') - - #Debug output - #print('URL: ' + url) - #print('DL URL: ' + download_url) - #print('Stream URL: ' + stream_url) - #print('Title: ' + title) - #print('Description: '+ description) - #print('Video ID: ' + video_id) - formats = [{ - 'url' : download_url, - #'app': 'ondemand?_fcs_vhost=cp108781.edgefcs.net', - 'play_path': stream_url, - 'player_url': 'http://www.rtl2.de/flashplayer/vipo_player.swf', - 'page_url': url, - 'flash_version' : "LNX 11,2,202,429", - 'rtmp_conn' : ["S:connect", "O:1", "NS:pageUrl:" + url, "NB:fpad:0", "NN:videoFunction:1", "O:0"], - 'no_resume' : True, - }] + 'url': download_url, + 'play_path': stream_url, + 'player_url': 'http://www.rtl2.de/flashplayer/vipo_player.swf', + 'page_url': url, + 'flash_version': 'LNX 11,2,202,429', + 'rtmp_conn': rtmp_conn, + 'no_resume': True, + }] + self._sort_formats(formats) return { 'id': video_id, 'title': title, - 'thumbnail' : thumbnail, - 'description' : description, + 'thumbnail': thumbnail, + 'description': description, 'formats': formats, } From b1b0b1ca3030a2b04dd94ec28d5ece3fda88a282 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 25 Jan 2015 18:14:59 +0100 Subject: [PATCH 055/327] [generic] Improve description testcase in rss test --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a028c4ed4..ad16b8330 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -362,7 +362,7 @@ class GenericIE(InfoExtractor): 'info_dict': { 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml', 'title': 'Zero Punctuation', - 'description': 're:' + 'description': 're:.*groundbreaking video review series.*' }, 'playlist_mincount': 11, }, From 6d2749aac407df1e039f5b61a294991c1e810cff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 25 Jan 2015 18:56:04 +0100 Subject: [PATCH 056/327] [drtv] Prefer the version without spoken subtitles (fixes #4779) For example for http://www.dr.dk/tv/se/moderne-klassikere/moderne-klassikere-one-republic-apologize#!/, there's a version where everytime someone speaks in English a computer voice translates it. --- youtube_dl/extractor/drtv.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index c44adb109..510ef04b0 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -48,14 +48,20 @@ class DRTVIE(SubtitlesInfoExtractor): elif asset['Kind'] == 'VideoResource': duration = asset['DurationInMilliseconds'] / 1000.0 restricted_to_denmark = asset['RestrictedToDenmark'] + spoken_subtitles = asset['Target'] == 'SpokenSubtitles' for link in asset['Links']: target = link['Target'] uri = link['Uri'] + format_id = target + preference = -1 if target == 'HDS' else -2 + if spoken_subtitles: + preference -= 2 + format_id += '-spoken-subtitles' formats.append({ 'url': uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43' if target == 'HDS' else uri, - 'format_id': target, + 'format_id': format_id, 'ext': link['FileFormat'], - 'preference': -1 if target == 'HDS' else -2, + 'preference': preference, }) subtitles_list = asset.get('SubtitlesList') if isinstance(subtitles_list, list): From 96a53167fa64293506f446d0c2bf3e0db6c8df31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 26 Jan 2015 00:32:31 +0600 Subject: [PATCH 057/327] [common] Generalize URLs' HTTP errors pre-testing --- youtube_dl/extractor/common.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 388c55e99..478232682 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -14,6 +14,7 @@ import xml.etree.ElementTree from ..compat import ( compat_cookiejar, + compat_HTTPError, compat_http_client, compat_urllib_error, compat_urllib_parse_urlparse, @@ -26,6 +27,7 @@ from ..utils import ( compiled_regex_type, ExtractorError, float_or_none, + HEADRequest, int_or_none, RegexNotFoundError, sanitize_filename, @@ -716,6 +718,27 @@ class InfoExtractor(object): ) formats.sort(key=_formats_key) + def _check_formats(self, formats, video_id): + if formats: + formats[:] = filter( + lambda f: self._is_valid_url( + f['url'], video_id, + item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'), + formats) + + def _is_valid_url(self, url, video_id, item='video'): + try: + self._request_webpage( + HEADRequest(url), video_id, + 'Checking %s URL' % item) + return True + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + self.report_warning( + '%s URL is invalid, skipping' % item, video_id) + return False + raise + def http_scheme(self): """ Either "http:" or "https:", depending on the user's preferences """ return ( From a57e8ce6580202c179c38a15abc31f84ca471521 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 26 Jan 2015 00:33:42 +0600 Subject: [PATCH 058/327] [lynda] Pre-test video URLs for HTTP errors (Closes #2185, closes #4782) --- youtube_dl/extractor/lynda.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 26e84970d..762cefa34 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -85,6 +85,7 @@ class LyndaIE(SubtitlesInfoExtractor): } for format_id, video_url in prioritized_streams['0'].items() ]) + self._check_formats(formats, video_id) self._sort_formats(formats) if self._downloader.params.get('listsubtitles', False): From d862a4f94fac46e5c6de790a9b66d78d463d666d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 26 Jan 2015 00:34:31 +0600 Subject: [PATCH 059/327] [spiegel] Use generalized formats pre-testing --- youtube_dl/extractor/spiegel.py | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index f345883c7..b868241d5 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -4,14 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_urlparse, - compat_HTTPError, -) -from ..utils import ( - HEADRequest, - ExtractorError, -) +from ..compat import compat_urlparse from .spiegeltv import SpiegeltvIE @@ -72,16 +65,6 @@ class SpiegelIE(InfoExtractor): if n.tag.startswith('type') and n.tag != 'type6': format_id = n.tag.rpartition('type')[2] video_url = base_url + n.find('./filename').text - # Test video URLs beforehand as some of them are invalid - try: - self._request_webpage( - HEADRequest(video_url), video_id, - 'Checking %s video URL' % format_id) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: - self.report_warning( - '%s video URL is invalid, skipping' % format_id, video_id) - continue formats.append({ 'format_id': format_id, 'url': video_url, @@ -94,6 +77,7 @@ class SpiegelIE(InfoExtractor): }) duration = float(idoc[0].findall('./duration')[0].text) + self._check_formats(formats, video_id) self._sort_formats(formats) return { From 80a49d3d7bcd235ba15bd491cc62a0345c9abce1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 26 Jan 2015 02:08:29 +0600 Subject: [PATCH 060/327] Credit @David-Development for rtl2 (#4780) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 8362b6d8a..1596a7548 100644 --- a/AUTHORS +++ b/AUTHORS @@ -105,3 +105,4 @@ Dinesh S Johan K. Jensen Yen Chi Hsuan Enam Mijbah Noor +David Luhmer From 2b1bd292ae72fdea4ab731f93777a99b3729b31d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 25 Jan 2015 21:40:43 +0100 Subject: [PATCH 061/327] release 2015.01.25 --- README.md | 23 +++++++++++++++++++++-- youtube_dl/version.py | 2 +- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 36b87444e..0580ab2a3 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,14 @@ which means you can modify it, redistribute it or use it however you like. ## Video Selection: --playlist-start NUMBER playlist video to start at (default is 1) --playlist-end NUMBER playlist video to end at (default is last) + --playlist-items ITEM_SPEC playlist video items to download. Specify + indices of the videos in the playlist + seperated by commas like: "--playlist-items + 1,2,5,8" if you want to download videos + indexed 1, 2, 5, 8 in the playlist. You can + specify range: "--playlist-items + 1-3,7,10-13", it will download the videos + at index 1, 2, 3, 7, 10, 11, 12 and 13. --match-title REGEX download only matching titles (regex or caseless sub-string) --reject-title REGEX skip download for matching titles (regex or @@ -124,7 +132,8 @@ which means you can modify it, redistribute it or use it however you like. ## Download Options: -r, --rate-limit LIMIT maximum download rate in bytes per second (e.g. 50K or 4.2M) - -R, --retries RETRIES number of retries (default is 10) + -R, --retries RETRIES number of retries (default is 10), or + "infinite". --buffer-size SIZE size of download buffer (e.g. 1024 or 16K) (default is 1024) --no-resize-buffer do not automatically adjust the buffer @@ -132,6 +141,11 @@ which means you can modify it, redistribute it or use it however you like. automatically resized from an initial value of SIZE. --playlist-reverse Download playlist videos in reverse order + --xattr-set-filesize (experimental) set file xattribute + ytdl.filesize with expected filesize + --external-downloader COMMAND (experimental) Use the specified external + downloader. Currently supports + aria2c,curl,wget ## Filesystem Options: -a, --batch-file FILE file containing URLs to download ('-' for @@ -191,7 +205,6 @@ which means you can modify it, redistribute it or use it however you like. --write-info-json write video metadata to a .info.json file --write-annotations write video annotations to a .annotation file - --write-thumbnail write thumbnail image to disk --load-info FILE json file containing the video information (created with the "--write-json" option) --cookies FILE file to read cookies from and dump cookie @@ -206,6 +219,12 @@ which means you can modify it, redistribute it or use it however you like. --no-cache-dir Disable filesystem caching --rm-cache-dir Delete all filesystem cache files +## Thumbnail images: + --write-thumbnail write thumbnail image to disk + --write-all-thumbnails write all thumbnail image formats to disk + --list-thumbnails Simulate and list all available thumbnail + formats + ## Verbosity / Simulation Options: -q, --quiet activates quiet mode --no-warnings Ignore warnings diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 35f3e1b6b..0d59cddd7 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.01.23.4' +__version__ = '2015.01.25' From e72c7e41238681338c92427f6731c74947645308 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 26 Jan 2015 12:01:43 +0100 Subject: [PATCH 062/327] [YoutubeDL] Always set the '_filename' field in the info_dict (reported in #4053) It's also useful when you use the '--write-info-json' option. --- youtube_dl/YoutubeDL.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index b772f87f1..ae62432c6 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1130,7 +1130,7 @@ class YoutubeDL(object): self._num_downloads += 1 - filename = self.prepare_filename(info_dict) + info_dict['_filename'] = filename = self.prepare_filename(info_dict) # Forced printings if self.params.get('forcetitle', False): @@ -1155,10 +1155,7 @@ class YoutubeDL(object): if self.params.get('forceformat', False): self.to_stdout(info_dict['format']) if self.params.get('forcejson', False): - info_dict['_filename'] = filename self.to_stdout(json.dumps(info_dict)) - if self.params.get('dump_single_json', False): - info_dict['_filename'] = filename # Do nothing else if in simulate mode if self.params.get('simulate', False): From 9f0df77ab13ca19bc074b501296bb6a494193cd1 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 26 Jan 2015 14:36:22 +0100 Subject: [PATCH 063/327] [YoutubeDL] Allow format filtering by fps --- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/options.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index ae62432c6..b5dd77e3f 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -820,7 +820,7 @@ class YoutubeDL(object): '!=': operator.ne, } operator_rex = re.compile(r'''(?x)\s*\[ - (?Pwidth|height|tbr|abr|vbr|filesize) + (?Pwidth|height|tbr|abr|vbr|filesize|fps) \s*(?P%s)(?P\s*\?)?\s* (?P[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?) \]$ diff --git a/youtube_dl/options.py b/youtube_dl/options.py index dbc6f5528..5e4a1ad9b 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -297,7 +297,7 @@ def parseOpts(overrideArguments=None): ' You can filter the video results by putting a condition in' ' brackets, as in -f "best[height=720]"' ' (or -f "[filesize>10M]"). ' - ' This works for filesize, height, width, tbr, abr, and vbr' + ' This works for filesize, height, width, tbr, abr, vbr, and fps' ' and the comparisons <, <=, >, >=, =, != .' ' Formats for which the value is not known are excluded unless you' ' put a question mark (?) after the operator.' From 6ca85be6f8697dc8cb0378854c2c6cdc154593d4 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Wed, 28 May 2014 18:19:23 +0200 Subject: [PATCH 064/327] Filter DRM protected media in f4m downloader --- youtube_dl/downloader/f4m.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index c68b2c303..29de7630d 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -230,6 +230,23 @@ class F4mFD(FileDownloader): A downloader for f4m manifests or AdobeHDS. """ + def _get_unencrypted_media(self, doc): + media=doc.findall(_add_ns('media')) + if not media: + self.report_error('No media found') + for e in (doc.findall(_add_ns('drmAdditionalHeader')) + + doc.findall(_add_ns('drmAdditionalHeaderSet'))): + # If id attribute is missing it's valid for all media nodes + # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute + if not 'id' in e.attrib: + self.report_error('Media is DRM protected') + media = list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and + 'drmAdditionalHeaderSetId' not in e.attrib, + media)) + if not media: + self.report_error('Media is DRM protected') + return media + def real_download(self, filename, info_dict): man_url = info_dict['url'] requested_bitrate = info_dict.get('tbr') @@ -248,7 +265,8 @@ class F4mFD(FileDownloader): ) doc = etree.fromstring(manifest) - formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))] + formats = [(int(f.attrib.get('bitrate', -1)), f) + for f in self._get_unencrypted_media(doc)] if requested_bitrate is None: # get the best format formats = sorted(formats, key=lambda f: f[0]) From 2103d038b3dac6f489d13282dcd2a6f4b74975c3 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Tue, 27 Jan 2015 15:38:54 +0200 Subject: [PATCH 065/327] [lnkgo] Adapt to website changes --- youtube_dl/extractor/lnkgo.py | 61 ++++++++++++++--------------------- 1 file changed, 25 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/lnkgo.py b/youtube_dl/extractor/lnkgo.py index a8e357859..fd23b0b43 100644 --- a/youtube_dl/extractor/lnkgo.py +++ b/youtube_dl/extractor/lnkgo.py @@ -6,13 +6,12 @@ import re from .common import InfoExtractor from ..utils import ( int_or_none, - js_to_json, unified_strdate, ) class LnkGoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi\-video/(?P[^/]+)/ziurek\-(?P[A-Za-z0-9\-]+)' + _VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi-video/(?P[^/]+)/ziurek-(?P[A-Za-z0-9-]+)' _TESTS = [{ 'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162', 'info_dict': { @@ -51,8 +50,7 @@ class LnkGoIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') + display_id = self._match_id(url) webpage = self._download_webpage( url, display_id, 'Downloading player webpage') @@ -61,6 +59,8 @@ class LnkGoIE(InfoExtractor): r'data-ep="([^"]+)"', webpage, 'video ID') title = self._og_search_title(webpage) description = self._og_search_description(webpage) + upload_date = unified_strdate(self._search_regex( + r'class="[^"]*meta-item[^"]*air-time[^"]*">.*?([^<]+)', webpage, 'upload date', fatal=False)) thumbnail_w = int_or_none( self._og_search_property('image:width', webpage, 'thumbnail width', fatal=False)) @@ -75,39 +75,28 @@ class LnkGoIE(InfoExtractor): 'height': thumbnail_h, }) - upload_date = unified_strdate(self._search_regex( - r'class="meta-item\sair-time">.*?([^<]+)', webpage, 'upload date', fatal=False)) - duration = int_or_none(self._search_regex( - r'VideoDuration = "([^"]+)"', webpage, 'duration', fatal=False)) + config = self._parse_json(self._search_regex( + r'episodePlayer\((\{.*?\}),\s*\{', webpage, 'sources'), video_id) - pg_rating = self._search_regex( - r'pgrating="([^"]+)"', webpage, 'PG rating', fatal=False, default='') - age_limit = self._AGE_LIMITS.get(pg_rating.upper(), 0) + if config.get('pGeo'): + self.report_warning( + 'This content might not be available in your country due to copyright reasons') - sources_js = self._search_regex( - r'(?s)sources:\s(\[.*?\]),', webpage, 'sources') - sources = self._parse_json( - sources_js, video_id, transform_source=js_to_json) + formats = [{ + 'format_id': 'hls', + 'ext': 'mp4', + 'url': config['EpisodeVideoLink_HLS'], + }] - formats = [] - for source in sources: - if source.get('provider') == 'rtmp': - m = re.search(r'^(?Prtmp://[^/]+/(?P[^/]+))/(?P.+)$', source['file']) - if not m: - continue - formats.append({ - 'format_id': 'rtmp', - 'ext': 'flv', - 'url': m.group('url'), - 'play_path': m.group('play_path'), - 'page_url': url, - }) - elif source.get('file').endswith('.m3u8'): - formats.append({ - 'format_id': 'hls', - 'ext': source.get('type', 'mp4'), - 'url': source['file'], - }) + m = re.search(r'^(?Prtmp://[^/]+/(?P[^/]+))/(?P.+)$', config['EpisodeVideoLink']) + if m: + formats.append({ + 'format_id': 'rtmp', + 'ext': 'flv', + 'url': m.group('url'), + 'play_path': m.group('play_path'), + 'page_url': url, + }) self._sort_formats(formats) @@ -117,8 +106,8 @@ class LnkGoIE(InfoExtractor): 'title': title, 'formats': formats, 'thumbnails': [thumbnail], - 'duration': duration, + 'duration': int_or_none(config.get('VideoTime')), 'description': description, - 'age_limit': age_limit, + 'age_limit': self._AGE_LIMITS.get(config.get('PGRating'), 0), 'upload_date': upload_date, } From fe7710cbccdade0b66ef48a7e2eedad71a0702cc Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 27 Jan 2015 23:55:22 +0800 Subject: [PATCH 066/327] [xuite] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/xuite.py | 147 +++++++++++++++++++++++++++++++ 2 files changed, 148 insertions(+) create mode 100644 youtube_dl/extractor/xuite.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 03c56156a..09070daa4 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -546,6 +546,7 @@ from .xminus import XMinusIE from .xnxx import XNXXIE from .xvideos import XVideosIE from .xtube import XTubeUserIE, XTubeIE +from .xuite import XuiteIE from .xxxymovies import XXXYMoviesIE from .yahoo import ( YahooIE, diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py new file mode 100644 index 000000000..943757c27 --- /dev/null +++ b/youtube_dl/extractor/xuite.py @@ -0,0 +1,147 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import base64 +from .common import InfoExtractor +from ..compat import ( + compat_urlparse, + compat_urllib_parse_unquote, + compat_parse_qs +) +from ..utils import ( + ExtractorError, + parse_iso8601, + parse_duration +) + +# ref: http://stackoverflow.com/questions/475074/regex-to-parse-or-validate-base64-data +REGEX_BASE64 = r'(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?' + +CST_ZONE = +8 # China Standard Time + + +class XuiteIE(InfoExtractor): + _VALID_URL = r'http://vlog.xuite.net/play/(?P%s)(/.*)?' % REGEX_BASE64 + _TESTS = [{ + # Audio + 'url': 'http://vlog.xuite.net/play/RGkzc1ZULTM4NjA5MTQuZmx2', + 'md5': '63a42c705772aa53fd4c1a0027f86adf', + 'info_dict': { + 'id': 'RGkzc1ZULTM4NjA5MTQuZmx2', + 'ext': 'mp3', + 'upload_date': '20110902', + 'uploader_id': '15973816', + 'uploader': '阿能', + 'timestamp': 1314932940, + 'title': '孤單南半球-歐德陽' + } + }, { + # Audio with alternative form of url + 'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9', + 'md5': 'c91645f324de53d82ebb80930e1a73d2', + 'info_dict': { + 'id': 'S1dDUjdyLTMyOTc3NjcuZmx2', + 'ext': 'mp3', + 'upload_date': '20101226', + 'uploader_id': '10102699', + 'uploader': '蠍', + 'timestamp': 1293367080, + 'title': '孫燕姿-眼淚成詩', + } + }, { + # Video with only one format + 'url': 'http://vlog.xuite.net/play/TkRZNjhULTM0NDE2MjkuZmx2', + 'md5': 'c45737fc8ac5dc8ac2f92ecbcecf505e', + 'info_dict': { + 'id': 'TkRZNjhULTM0NDE2MjkuZmx2', + 'ext': 'mp4', + 'upload_date': '20110306', + 'uploader_id': '10400126', + 'uploader': 'Valen', + 'timestamp': 1299383640, + 'title': '孫燕姿 - 眼淚成詩', + } + }, { + # Video with two formats + 'url': 'http://vlog.xuite.net/play/bWo1N1pLLTIxMzAxMTcwLmZsdg==', + 'md5': '1166e0f461efe55b62e26a2d2a68e6de', + 'info_dict': { + 'id': 'bWo1N1pLLTIxMzAxMTcwLmZsdg==', + 'ext': 'mp4', + 'upload_date': '20150117', + 'uploader_id': '242127761', + 'uploader': '我只是想認真點', + 'timestamp': 1421481240, + 'title': '暗殺教室 02', + } + }] + + def _flv_config(self, media_id): + base64_media_id = base64.b64encode(media_id.encode('utf-8')).decode('utf-8') + flv_config_url = 'http://vlog.xuite.net/flash/player?media=' + base64_media_id + flv_config = self._download_xml(flv_config_url, 'flv config') + + prop_dict = {} + for prop in flv_config.findall('./property'): + prop_id = base64.b64decode(prop.attrib['id']).decode('utf-8') + + if not prop.text: + continue # CDATA may be empty in flv config + + encoded_content = base64.b64decode(prop.text).decode('utf-8') + prop_dict[prop_id] = compat_urllib_parse_unquote(encoded_content) + + return prop_dict + + def _type_string(self, media_url): + query_string = compat_urlparse.urlparse(media_url).query + type_string = compat_parse_qs(query_string)['q'][0] + return type_string + + def _guess_ext(self, media_url): + type_string = self._type_string(media_url) + if type_string == 'mp3': + return 'mp3' + elif type_string == '360' or type_string == '720': + return 'mp4' + else: + raise ExtractorError('Unknown type string %s' % type_string) + + def _real_extract(self, url): + video_id = self._match_id(url) + + page = self._download_webpage(url, video_id) + media_id = self._html_search_regex(r'data-mediaid="(\d+)"', page, 'media id') + flv_config = self._flv_config(media_id) + + timestamp_local = parse_iso8601(flv_config['publish_datetime'], ' ') + timestamp_gmt = timestamp_local - CST_ZONE * 3600 + + ret_attrs = { + 'id': video_id, + 'title': flv_config['title'], + 'thumbnail': flv_config['thumb'], + 'uploader': flv_config['author_name'], + 'timestamp': timestamp_gmt, + 'uploader_id': flv_config['author_id'], + 'duration': parse_duration(flv_config['duration']), + 'categories': [flv_config['category']] + } + + if 'hq_src' in flv_config: + src = flv_config['src'] + src_hq = flv_config['hq_src'] + ret_attrs['formats'] = [{ + 'url': src, + 'ext': self._guess_ext(src), + 'format_id': self._type_string(src) + }, { + 'url': src_hq, + 'ext': self._guess_ext(src_hq), + 'format_id': self._type_string(src_hq) + }] + else: + ret_attrs['url'] = flv_config['src'] + ret_attrs['ext'] = self._guess_ext(flv_config['src']) + + return ret_attrs From 6348ad12a057cc1c454488ce7c37070cf39a8f06 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 28 Jan 2015 00:13:40 +0800 Subject: [PATCH 067/327] [xuite] Add height information for the two formats --- youtube_dl/extractor/xuite.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py index 943757c27..aef9a14a3 100644 --- a/youtube_dl/extractor/xuite.py +++ b/youtube_dl/extractor/xuite.py @@ -129,17 +129,17 @@ class XuiteIE(InfoExtractor): } if 'hq_src' in flv_config: - src = flv_config['src'] - src_hq = flv_config['hq_src'] - ret_attrs['formats'] = [{ - 'url': src, - 'ext': self._guess_ext(src), - 'format_id': self._type_string(src) - }, { - 'url': src_hq, - 'ext': self._guess_ext(src_hq), - 'format_id': self._type_string(src_hq) - }] + urls = [flv_config['src'], flv_config['hq_src']] + + ret_attrs['formats'] = [] + + for url in urls: + ret_attrs['formats'].append({ + 'url': url, + 'ext': self._guess_ext(url), + 'format_id': self._type_string(url), + 'height': int(self._type_string(url)) + }) else: ret_attrs['url'] = flv_config['src'] ret_attrs['ext'] = self._guess_ext(flv_config['src']) From 3a0d2f520a0f95c2f87b1c95049135b10206f97f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 27 Jan 2015 22:38:28 +0600 Subject: [PATCH 068/327] [YoutubeDL] Temporary fix for subprocess encoding issues on python2 @ Windows (Closes #4787) For now filenames will be encoded with preferrefencoding before written to disk --- youtube_dl/YoutubeDL.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index b5dd77e3f..7f054cdff 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -543,6 +543,11 @@ class YoutubeDL(object): outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) tmpl = compat_expanduser(outtmpl) filename = tmpl % template_dict + # Temporary fix for #4787 + # 'Treat' all problem characters by passing filename through preferredencoding + # to workaround encoding issues with subprocess on python2 @ Windows + if sys.version_info < (3, 0) and sys.platform == 'win32': + filename = encodeFilename(filename, True).decode(preferredencoding()) return filename except ValueError as err: self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') From a28383834b07787b22a837bd6028bf24bea7ec94 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 28 Jan 2015 01:30:14 +0800 Subject: [PATCH 069/327] [xuite] Update tests --- youtube_dl/extractor/xuite.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py index aef9a14a3..8466d4bc5 100644 --- a/youtube_dl/extractor/xuite.py +++ b/youtube_dl/extractor/xuite.py @@ -33,7 +33,10 @@ class XuiteIE(InfoExtractor): 'uploader_id': '15973816', 'uploader': '阿能', 'timestamp': 1314932940, - 'title': '孤單南半球-歐德陽' + 'title': '孤單南半球-歐德陽', + 'thumbnail': 're:^https?://.*\.jpg$', + 'categories': ['個人短片'], + 'duration': 247.246 } }, { # Audio with alternative form of url @@ -47,6 +50,9 @@ class XuiteIE(InfoExtractor): 'uploader': '蠍', 'timestamp': 1293367080, 'title': '孫燕姿-眼淚成詩', + 'thumbnail': 're:^https?://.*\.jpg$', + 'categories': ['個人短片'], + 'duration': 223.19 } }, { # Video with only one format @@ -60,6 +66,9 @@ class XuiteIE(InfoExtractor): 'uploader': 'Valen', 'timestamp': 1299383640, 'title': '孫燕姿 - 眼淚成詩', + 'thumbnail': 're:^https?://.*\.jpg$', + 'categories': ['影視娛樂'], + 'duration': 217.399 } }, { # Video with two formats @@ -73,6 +82,9 @@ class XuiteIE(InfoExtractor): 'uploader': '我只是想認真點', 'timestamp': 1421481240, 'title': '暗殺教室 02', + 'thumbnail': 're:^https?://.*\.jpg$', + 'categories': ['電玩動漫'], + 'duration': 1384.907 } }] From 796df3c631dcf28f7ebdf256e57fcfa3b6463abe Mon Sep 17 00:00:00 2001 From: Shaya G Date: Wed, 28 Jan 2015 00:08:19 -0500 Subject: [PATCH 070/327] fixed viddler support - needed a Referer header; also added a viddler generic extractor --- youtube_dl/extractor/generic.py | 20 ++++++++++++++++++ youtube_dl/extractor/viddler.py | 36 +++++++++++++++++++++++++++++---- 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ad16b8330..a937a1e09 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -498,6 +498,19 @@ class GenericIE(InfoExtractor): 'uploader': 'www.abc.net.au', 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015', } + }, + # embedded viddler video + { + 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597', + 'info_dict': { + 'id': '4d03aad9', + 'ext': 'mp4', + 'uploader': 'deadspin', + 'title': 'WALL-TO-GORTAT', + 'timestamp': 1422285291, + 'upload_date': '20150126', + }, + 'add_ie': ['Viddler'], } ] @@ -860,6 +873,13 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url')) + # Look for embedded Viddler player + mobj = (re.search(r']+?src=(["\'])(?P(?:https?:)?//(?:www\.)?viddler\.com/embed/.+?)\1', webpage) or + re.search(r']+?value=(["\'])(?P(?:https?:)?//(?:www\.)?viddler\.com/player/.+?)\1', webpage)) + + if mobj is not None: + return self.url_result(mobj.group('url')) + # Look for Ooyala videos mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P[^"&]+)', webpage) or re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P.{32})[\'"]', webpage)) diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py index 0faa729c6..bacbad50a 100644 --- a/youtube_dl/extractor/viddler.py +++ b/youtube_dl/extractor/viddler.py @@ -5,11 +5,14 @@ from ..utils import ( float_or_none, int_or_none, ) +from ..compat import ( + compat_urllib_request +) class ViddlerIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P[a-z0-9]+)' - _TEST = { + _TESTS = [{ "url": "http://www.viddler.com/v/43903784", 'md5': 'ae43ad7cb59431ce043f0ff7fa13cbf4', 'info_dict': { @@ -25,7 +28,30 @@ class ViddlerIE(InfoExtractor): 'view_count': int, 'categories': ['video content', 'high quality video', 'video made easy', 'how to produce video with limited resources', 'viddler'], } - } + }, { + "url": "http://www.viddler.com/v/4d03aad9/", + "file": "4d03aad9.mp4", + "md5": "faa71fbf70c0bee7ab93076fd007f4b0", + "info_dict": { + 'upload_date': '20150126', + 'uploader': 'deadspin', + 'id': '4d03aad9', + 'timestamp': 1422285291, + 'title': 'WALL-TO-GORTAT', + } + }, { + "url": "http://www.viddler.com/player/221ebbbd/0/", + "file": "221ebbbd.mp4", + "md5": "0defa2bd0ea613d14a6e9bd1db6be326", + "info_dict": { + 'upload_date': '20140929', + 'uploader': 'BCLETeens', + 'id': '221ebbbd', + 'timestamp': 1411997190, + 'title': 'LETeens-Grammar-snack-third-conditional', + 'description': ' ' + } + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -33,7 +59,9 @@ class ViddlerIE(InfoExtractor): json_url = ( 'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?video_id=%s&key=v0vhrt7bg2xq1vyxhkct' % video_id) - data = self._download_json(json_url, video_id)['video'] + headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'} + request = compat_urllib_request.Request(json_url, None, headers) + data = self._download_json(request, video_id)['video'] formats = [] for filed in data['files']: @@ -53,7 +81,7 @@ class ViddlerIE(InfoExtractor): if filed.get('cdn_url'): f = f.copy() - f['url'] = self._proto_relative_url(filed['cdn_url']) + f['url'] = self._proto_relative_url(filed['cdn_url'], 'http:') f['format_id'] = filed['profile_id'] + '-cdn' f['source_preference'] = 1 formats.append(f) From 0865f397ae6c875ec4194093af5c8f53e75f2285 Mon Sep 17 00:00:00 2001 From: Paul Hartmann Date: Wed, 28 Jan 2015 08:21:04 +0100 Subject: [PATCH 071/327] added extractor for dctp.tv --- youtube_dl/downloader/rtmp.py | 3 +++ youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/dctp.py | 41 ++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+) create mode 100644 youtube_dl/extractor/dctp.py diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index e06ebe826..6cb1bfc50 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -106,6 +106,7 @@ class RtmpFD(FileDownloader): protocol = info_dict.get('rtmp_protocol', None) no_resume = info_dict.get('no_resume', False) continue_dl = info_dict.get('continuedl', False) + real_time = info_dict.get('real_time', False) self.report_destination(filename) tmpfilename = self.temp_name(filename) @@ -143,6 +144,8 @@ class RtmpFD(FileDownloader): basic_args += ['--conn', conn] if protocol is not None: basic_args += ['--protocol', protocol] + if real_time: + basic_args += ['--realtime'] args = basic_args if not no_resume and continue_dl and not live: diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 03c56156a..873ae69d3 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -89,6 +89,7 @@ from .dailymotion import ( ) from .daum import DaumIE from .dbtv import DBTVIE +from .dctp import DctpTvIE from .deezer import DeezerPlaylistIE from .dfb import DFBIE from .dotsub import DotsubIE diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py new file mode 100644 index 000000000..5382255f0 --- /dev/null +++ b/youtube_dl/extractor/dctp.py @@ -0,0 +1,41 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +class DctpTvIE(InfoExtractor): + _VALID_URL = r'^http://www.dctp.tv/(#/)?filme/(?P.+?)/$' + + def _real_extract(self, url): + video_id = self._match_id(url) + base_url = 'http://dctp-ivms2-restapi.s3.amazonaws.com/' + version_json = self._download_json(base_url + 'version.json', video_id) + version = version_json['version_name'] + info_json = self._download_json( + '{}{}/restapi/slugs/{}.json'.format(base_url, version, video_id), video_id) + object_id = info_json['object_id'] + meta_json = self._download_json( + '{}{}/restapi/media/{}.json'.format(base_url, version, object_id), video_id) + uuid = meta_json['uuid'] + title = meta_json['title'] + wide = meta_json['is_wide'] + if wide: + ratio = '16x9' + else: + ratio = '4x3' + play_path = 'mp4:{}_dctp_0500_{}.m4v'.format(uuid, ratio) + + servers_json = self._download_json('http://www.dctp.tv/streaming_servers/', video_id) + url = servers_json[0]['endpoint'] + + return { + 'id': video_id, + 'title': title, + 'format': 'rtmp', + 'url': url, + 'play_path': play_path, + 'real_time': True, + 'ext': 'flv' + } + + From 48a1e5141ad9f6b5c4ce8a355dcd7bf99c80e333 Mon Sep 17 00:00:00 2001 From: Paul Hartmann Date: Wed, 28 Jan 2015 08:59:58 +0100 Subject: [PATCH 072/327] added test for dctp --- youtube_dl/extractor/dctp.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py index 5382255f0..9b687ef43 100644 --- a/youtube_dl/extractor/dctp.py +++ b/youtube_dl/extractor/dctp.py @@ -3,8 +3,16 @@ from __future__ import unicode_literals from .common import InfoExtractor + class DctpTvIE(InfoExtractor): _VALID_URL = r'^http://www.dctp.tv/(#/)?filme/(?P.+?)/$' + _TEST = { + 'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/', + 'info_dict': { + 'id': 'videoinstallation-fuer-eine-kaufhausfassade', + 'ext': 'flv', + 'title': 'Videoinstallation für eine Kaufhausfassade'} + } def _real_extract(self, url): video_id = self._match_id(url) @@ -38,4 +46,3 @@ class DctpTvIE(InfoExtractor): 'ext': 'flv' } - From b04fbd789c1efd5f918f81b5a5b9b6dafa806900 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 28 Jan 2015 22:49:42 +0600 Subject: [PATCH 073/327] [viddler] Modernize --- youtube_dl/extractor/viddler.py | 46 ++++++++++++++++----------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py index bacbad50a..9caee94e7 100644 --- a/youtube_dl/extractor/viddler.py +++ b/youtube_dl/extractor/viddler.py @@ -13,43 +13,43 @@ from ..compat import ( class ViddlerIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P[a-z0-9]+)' _TESTS = [{ - "url": "http://www.viddler.com/v/43903784", + 'url': 'http://www.viddler.com/v/43903784', 'md5': 'ae43ad7cb59431ce043f0ff7fa13cbf4', 'info_dict': { 'id': '43903784', 'ext': 'mp4', - "title": "Video Made Easy", - 'description': 'You don\'t need to be a professional to make high-quality video content. Viddler provides some quick and easy tips on how to produce great video content with limited resources. ', - "uploader": "viddler", + 'title': 'Video Made Easy', + 'description': 'md5:6a697ebd844ff3093bd2e82c37b409cd', + 'uploader': 'viddler', 'timestamp': 1335371429, 'upload_date': '20120425', - "duration": 100.89, + 'duration': 100.89, 'thumbnail': 're:^https?://.*\.jpg$', 'view_count': int, 'categories': ['video content', 'high quality video', 'video made easy', 'how to produce video with limited resources', 'viddler'], } }, { - "url": "http://www.viddler.com/v/4d03aad9/", - "file": "4d03aad9.mp4", - "md5": "faa71fbf70c0bee7ab93076fd007f4b0", - "info_dict": { + 'url': 'http://www.viddler.com/v/4d03aad9/', + 'md5': 'faa71fbf70c0bee7ab93076fd007f4b0', + 'info_dict': { + 'id': '4d03aad9', + 'ext': 'mp4', + 'title': 'WALL-TO-GORTAT', 'upload_date': '20150126', 'uploader': 'deadspin', - 'id': '4d03aad9', 'timestamp': 1422285291, - 'title': 'WALL-TO-GORTAT', } }, { - "url": "http://www.viddler.com/player/221ebbbd/0/", - "file": "221ebbbd.mp4", - "md5": "0defa2bd0ea613d14a6e9bd1db6be326", - "info_dict": { + 'url': 'http://www.viddler.com/player/221ebbbd/0/', + 'md5': '0defa2bd0ea613d14a6e9bd1db6be326', + 'info_dict': { + 'id': '221ebbbd', + 'ext': 'mp4', + 'title': 'LETeens-Grammar-snack-third-conditional', + 'description': ' ', 'upload_date': '20140929', 'uploader': 'BCLETeens', - 'id': '221ebbbd', 'timestamp': 1411997190, - 'title': 'LETeens-Grammar-snack-third-conditional', - 'description': ' ' } }] @@ -68,7 +68,7 @@ class ViddlerIE(InfoExtractor): if filed.get('status', 'ready') != 'ready': continue f = { - 'format_id': filed['profile_id'], + 'format_id': filed['profile_id'] or filed['profile_name'], 'format_note': filed['profile_name'], 'url': self._proto_relative_url(filed['url']), 'width': int_or_none(filed.get('width')), @@ -82,15 +82,14 @@ class ViddlerIE(InfoExtractor): if filed.get('cdn_url'): f = f.copy() f['url'] = self._proto_relative_url(filed['cdn_url'], 'http:') - f['format_id'] = filed['profile_id'] + '-cdn' + f['format_id'] = (filed['profile_id'] or filed['profile_name']) + '-cdn' f['source_preference'] = 1 formats.append(f) if filed.get('html5_video_source'): f = f.copy() - f['url'] = self._proto_relative_url( - filed['html5_video_source']) - f['format_id'] = filed['profile_id'] + '-html5' + f['url'] = self._proto_relative_url(filed['html5_video_source']) + f['format_id'] = (filed['profile_id'] or filed['profile_name']) + '-html5' f['source_preference'] = 0 formats.append(f) self._sort_formats(formats) @@ -99,7 +98,6 @@ class ViddlerIE(InfoExtractor): t.get('text') for t in data.get('tags', []) if 'text' in t] return { - '_type': 'video', 'id': video_id, 'title': data['title'], 'formats': formats, From e0d9f85aee92ebcb40ce688ed43172c718751e27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 28 Jan 2015 22:56:06 +0600 Subject: [PATCH 074/327] Credit @HyShai for viddler fixes (#4794) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 1596a7548..20e620c35 100644 --- a/AUTHORS +++ b/AUTHORS @@ -106,3 +106,4 @@ Johan K. Jensen Yen Chi Hsuan Enam Mijbah Noor David Luhmer +Shaya Goldberg From cb454b333d91718a0c2b36c34c8b0d6858ff9505 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 28 Jan 2015 23:07:37 +0600 Subject: [PATCH 075/327] [generic] Improve some regexes --- youtube_dl/extractor/generic.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a937a1e09..41884ed7a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -874,15 +874,15 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url')) # Look for embedded Viddler player - mobj = (re.search(r']+?src=(["\'])(?P(?:https?:)?//(?:www\.)?viddler\.com/embed/.+?)\1', webpage) or - re.search(r']+?value=(["\'])(?P(?:https?:)?//(?:www\.)?viddler\.com/player/.+?)\1', webpage)) - + mobj = re.search( + r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1', + webpage) if mobj is not None: return self.url_result(mobj.group('url')) # Look for Ooyala videos - mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P[^"&]+)', webpage) or - re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P.{32})[\'"]', webpage)) + mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P[^"&]+)', webpage) or + re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P.{32})[\'"]', webpage)) if mobj is not None: return OoyalaIE._build_url_result(mobj.group('ec')) From 18b4e9e79d6cc1ed21df7479273915ca3a763b80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 28 Jan 2015 23:21:17 +0600 Subject: [PATCH 076/327] [viddler] Extract comment count --- youtube_dl/extractor/viddler.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py index 9caee94e7..ef104dc29 100644 --- a/youtube_dl/extractor/viddler.py +++ b/youtube_dl/extractor/viddler.py @@ -26,6 +26,7 @@ class ViddlerIE(InfoExtractor): 'duration': 100.89, 'thumbnail': 're:^https?://.*\.jpg$', 'view_count': int, + 'comment_count': int, 'categories': ['video content', 'high quality video', 'video made easy', 'how to produce video with limited resources', 'viddler'], } }, { @@ -38,6 +39,8 @@ class ViddlerIE(InfoExtractor): 'upload_date': '20150126', 'uploader': 'deadspin', 'timestamp': 1422285291, + 'view_count': int, + 'comment_count': int, } }, { 'url': 'http://www.viddler.com/player/221ebbbd/0/', @@ -50,6 +53,8 @@ class ViddlerIE(InfoExtractor): 'upload_date': '20140929', 'uploader': 'BCLETeens', 'timestamp': 1411997190, + 'view_count': int, + 'comment_count': int, } }] @@ -67,8 +72,9 @@ class ViddlerIE(InfoExtractor): for filed in data['files']: if filed.get('status', 'ready') != 'ready': continue + format_id = filed.get('profile_id') or filed['profile_name'] f = { - 'format_id': filed['profile_id'] or filed['profile_name'], + 'format_id': format_id, 'format_note': filed['profile_name'], 'url': self._proto_relative_url(filed['url']), 'width': int_or_none(filed.get('width')), @@ -82,14 +88,14 @@ class ViddlerIE(InfoExtractor): if filed.get('cdn_url'): f = f.copy() f['url'] = self._proto_relative_url(filed['cdn_url'], 'http:') - f['format_id'] = (filed['profile_id'] or filed['profile_name']) + '-cdn' + f['format_id'] = format_id + '-cdn' f['source_preference'] = 1 formats.append(f) if filed.get('html5_video_source'): f = f.copy() f['url'] = self._proto_relative_url(filed['html5_video_source']) - f['format_id'] = (filed['profile_id'] or filed['profile_name']) + '-html5' + f['format_id'] = format_id + '-html5' f['source_preference'] = 0 formats.append(f) self._sort_formats(formats) @@ -107,5 +113,6 @@ class ViddlerIE(InfoExtractor): 'uploader': data.get('author'), 'duration': float_or_none(data.get('length')), 'view_count': int_or_none(data.get('view_count')), + 'comment_count': int_or_none(data.get('comment_count')), 'categories': categories, } From 63be3b89894f26caf69d9667f8e7db5ecbcb03ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 28 Jan 2015 23:58:14 +0600 Subject: [PATCH 077/327] [ivi] Modernize --- youtube_dl/extractor/ivi.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 7a400323d..e82594444 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -16,7 +16,7 @@ from ..utils import ( class IviIE(InfoExtractor): IE_DESC = 'ivi.ru' IE_NAME = 'ivi' - _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P\d+)' _TESTS = [ # Single movie @@ -63,29 +63,34 @@ class IviIE(InfoExtractor): return int(m.group('commentcount')) if m is not None else 0 def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('videoid') + video_id = self._match_id(url) api_url = 'http://api.digitalaccess.ru/api/json/' - data = {'method': 'da.content.get', - 'params': [video_id, {'site': 's183', - 'referrer': 'http://www.ivi.ru/watch/%s' % video_id, - 'contentid': video_id - } - ] + data = { + 'method': 'da.content.get', + 'params': [ + video_id, { + 'site': 's183', + 'referrer': 'http://www.ivi.ru/watch/%s' % video_id, + 'contentid': video_id } + ] + } request = compat_urllib_request.Request(api_url, json.dumps(data)) - video_json_page = self._download_webpage(request, video_id, 'Downloading video JSON') + video_json_page = self._download_webpage( + request, video_id, 'Downloading video JSON') video_json = json.loads(video_json_page) if 'error' in video_json: error = video_json['error'] if error['origin'] == 'NoRedisValidData': raise ExtractorError('Video %s does not exist' % video_id, expected=True) - raise ExtractorError('Unable to download video %s: %s' % (video_id, error['message']), expected=True) + raise ExtractorError( + 'Unable to download video %s: %s' % (video_id, error['message']), + expected=True) result = video_json['result'] From dcf53d440801505a27ee5615e3fb58b6a794bc73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 Jan 2015 00:44:40 +0600 Subject: [PATCH 078/327] [YoutubeDL] Set format_id for video+audio (Closes #3634) --- youtube_dl/YoutubeDL.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 7f054cdff..4c238c555 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1074,6 +1074,7 @@ class YoutubeDL(object): selected_format = { 'requested_formats': formats_info, 'format': rf, + 'format_id': rf, 'ext': formats_info[0]['ext'], 'width': formats_info[0].get('width'), 'height': formats_info[0].get('height'), From 206dba27a4d134cf9a65b93f7a12bee033a80b18 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 29 Jan 2015 03:18:53 +0800 Subject: [PATCH 079/327] [NextMedia] Add new extractor --- youtube_dl/extractor/__init__.py | 6 ++ youtube_dl/extractor/nextmedia.py | 163 ++++++++++++++++++++++++++++++ 2 files changed, 169 insertions(+) create mode 100644 youtube_dl/extractor/nextmedia.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 03c56156a..444af8b6e 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -284,6 +284,12 @@ from .netzkino import NetzkinoIE from .nerdcubed import NerdCubedFeedIE from .newgrounds import NewgroundsIE from .newstube import NewstubeIE +from .nextmedia import ( + NextMediaIE, + NextMediaActionNewsIE, + AppleDailyRealtimeNewsIE, + AppleDailyAnimationNewsIE +) from .nfb import NFBIE from .nfl import NFLIE from .nhl import NHLIE, NHLVideocenterIE diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py new file mode 100644 index 000000000..02dba4ef6 --- /dev/null +++ b/youtube_dl/extractor/nextmedia.py @@ -0,0 +1,163 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import parse_iso8601 + + +class NextMediaIE(InfoExtractor): + _VALID_URL = r'http://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P\d+)/(?P\d+)' + _TESTS = [{ + 'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199', + 'md5': 'dff9fad7009311c421176d1ac90bfe4f', + 'info_dict': { + 'id': '53109199', + 'ext': 'mp4', + 'title': '【佔領金鐘】50外國領事議員撐場 讚學生勇敢香港有希望', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'md5:28222b9912b6665a21011b034c70fcc7', + 'timestamp': 1415456273, + 'upload_date': '20141108', + } + }] + + _URL_PATTERN = r'\{ url: \'(.+)\' \}' + + def _real_extract(self, url): + news_id = self._match_id(url) + page = self._download_webpage(url, news_id) + return self._extract_from_nextmedia_page(news_id, url, page) + + def _extract_from_nextmedia_page(self, news_id, url, page): + title = self._fetch_title(page) + video_url = self._search_regex(self._URL_PATTERN, page, 'video url') + + attrs = { + 'id': news_id, + 'title': title, + 'url': video_url, # ext can be inferred from url + 'thumbnail': self._fetch_thumbnail(page), + 'description': self._fetch_description(page), + } + + timestamp = self._fetch_timestamp(page) + if timestamp: + attrs['timestamp'] = timestamp + else: + attrs['upload_date'] = self._fetch_upload_date(url) + + return attrs + + def _fetch_title(self, page): + return self._og_search_title(page) + + def _fetch_thumbnail(self, page): + return self._og_search_thumbnail(page) + + def _fetch_timestamp(self, page): + dateCreated = self._search_regex('"dateCreated":"([^"]+)"', page, 'created time') + return parse_iso8601(dateCreated) + + def _fetch_upload_date(self, url): + return self._search_regex(self._VALID_URL, url, 'upload date', group='date') + + def _fetch_description(self, page): + return self._og_search_property('description', page) + + +class NextMediaActionNewsIE(NextMediaIE): + _VALID_URL = r'http://hk.dv.nextmedia.com/actionnews/[^/]+/(?P\d+)/(?P\d+)/\d+' + _TESTS = [{ + 'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460', + 'md5': '05fce8ffeed7a5e00665d4b7cf0f9201', + 'info_dict': { + 'id': '19009428', + 'ext': 'mp4', + 'title': '【壹週刊】細10年男友偷食 50歲邵美琪再失戀', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'md5:cd802fad1f40fd9ea178c1e2af02d659', + 'timestamp': 1421791200, + 'upload_date': '20150120', + } + }] + + def _real_extract(self, url): + news_id = self._match_id(url) + actionnews_page = self._download_webpage(url, news_id) + article_url = self._og_search_url(actionnews_page) + article_page = self._download_webpage(article_url, news_id) + return self._extract_from_nextmedia_page(news_id, url, article_page) + + +class AppleDailyRealtimeNewsIE(NextMediaIE): + _VALID_URL = r'http://(www|ent).appledaily.com.tw/(realtimenews|enews)/[^/]+/[^/]+/(?P\d+)/(?P\d+)(/.*)?' + _TESTS = [{ + 'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694', + 'md5': 'a843ab23d150977cc55ef94f1e2c1e4d', + 'info_dict': { + 'id': '36354694', + 'ext': 'mp4', + 'title': '周亭羽走過摩鐵陰霾2男陪吃 九把刀孤寒看醫生', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'md5:b23787119933404ce515c6356a8c355c', + 'upload_date': '20150128', + } + }, { + 'url': 'http://www.appledaily.com.tw/realtimenews/article/strange/20150128/550549/%E4%B8%8D%E6%BB%BF%E8%A2%AB%E8%B8%A9%E8%85%B3%E3%80%80%E5%B1%B1%E6%9D%B1%E5%85%A9%E5%A4%A7%E5%AA%BD%E4%B8%80%E8%B7%AF%E6%89%93%E4%B8%8B%E8%BB%8A', + 'md5': '86b4e9132d158279c7883822d94ccc49', + 'info_dict': { + 'id': '550549', + 'ext': 'mp4', + 'title': '不滿被踩腳 山東兩大媽一路打下車', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'md5:2648aaf6fc4f401f6de35a91d111aa1d', + 'upload_date': '20150128', + } + }] + + _URL_PATTERN = r'\{url: \'(.+)\'\}' + + def _fetch_title(self, page): + return self._html_search_regex(r'

([^<>]+)

', page, 'news title') + + def _fetch_thumbnail(self, page): + return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False) + + def _fetch_timestamp(self, page): + return None + + +class AppleDailyAnimationNewsIE(AppleDailyRealtimeNewsIE): + _VALID_URL = 'http://www.appledaily.com.tw/animation/[^/]+/[^/]+/(?P\d+)/(?P\d+)(/.*)?' + _TESTS = [{ + 'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003671', + 'md5': '03df296d95dedc2d5886debbb80cb43f', + 'info_dict': { + 'id': '5003671', + 'ext': 'mp4', + 'title': '20正妹熱舞 《刀龍傳說Online》火辣上市', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'md5:23c0aac567dc08c9c16a3161a2c2e3cd', + 'upload_date': '20150128', + } + }, { + # No thumbnail + 'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003673/', + 'md5': 'b06182cd386ea7bc6115ec7ff0f72aeb', + 'info_dict': { + 'id': '5003673', + 'ext': 'mp4', + 'title': '半夜尿尿 好像會看到___', + 'description': 'md5:61d2da7fe117fede148706cdb85ac066', + 'upload_date': '20150128', + }, + 'expected_warnings': [ + 'video thumbnail', + ] + }] + + def _fetch_title(self, page): + return self._html_search_meta('description', page, 'news title') + + def _fetch_description(self, page): + return self._html_search_meta('description', page, 'news description') From 367cc95aa76731aa951e949b325ac9c909af639d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 29 Jan 2015 03:49:56 +0800 Subject: [PATCH 080/327] [CtsNews] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/ctsnews.py | 51 ++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 youtube_dl/extractor/ctsnews.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 03c56156a..72c5e4973 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -82,6 +82,7 @@ from .crunchyroll import ( CrunchyrollShowPlaylistIE ) from .cspan import CSpanIE +from .ctsnews import CtsNewsIE from .dailymotion import ( DailymotionIE, DailymotionPlaylistIE, diff --git a/youtube_dl/extractor/ctsnews.py b/youtube_dl/extractor/ctsnews.py new file mode 100644 index 000000000..e1d8c814e --- /dev/null +++ b/youtube_dl/extractor/ctsnews.py @@ -0,0 +1,51 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import parse_iso8601, ExtractorError + + +class CtsNewsIE(InfoExtractor): + _VALID_URL = r'http://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P\d+)\.html' + _TESTS = [{ + 'url': 'http://news.cts.com.tw/cts/international/201309/201309031304098.html', + 'md5': '3aee7e0df7cdff94e43581f54c22619e', + 'info_dict': { + 'id': '201309031304098', + 'ext': 'mp4', + 'title': '韓國31歲童顏男 貌如十多歲小孩', + 'description': 'md5:f183feeba3752b683827aab71adad584', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1378205880, + 'upload_date': '20130903', + } + }] + + def _real_extract(self, url): + news_id = self._match_id(url) + page = self._download_webpage(url, news_id) + + if not self._search_regex(r'(CTSPlayer2)', page, 'CTSPlayer2 identifier', fatal=False): + raise ExtractorError('The news includes no videos!') + + feed_pattern = r'(http://news.cts.com.tw/action/mp4feed.php\?news_id=\d+)' + feed_url = self._html_search_regex(feed_pattern, page, 'feed url') + feed_page = self._download_webpage(feed_url, news_id) + + description = self._html_search_meta('description', page) + title = self._html_search_meta('title', page) + thumbnail = self._html_search_meta('image', page) + + datetime_pattern = r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})' + datetime_str = self._html_search_regex(datetime_pattern, page, 'date and time') + time = (datetime_str + ':00+08:00').replace('/', '-') + timestamp = parse_iso8601(time, delimiter=' ') + + return { + 'id': news_id, + 'title': title, + 'description': description, + 'url': feed_page, + 'thumbnail': thumbnail, + 'timestamp': timestamp + } From d20547610314e0821542fecd1872b5851bd8d5d7 Mon Sep 17 00:00:00 2001 From: Paul Hartmann Date: Thu, 29 Jan 2015 01:36:15 +0100 Subject: [PATCH 081/327] fix srmediathek description --- youtube_dl/extractor/srmediathek.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/srmediathek.py b/youtube_dl/extractor/srmediathek.py index 666a7dcc8..5d583c720 100644 --- a/youtube_dl/extractor/srmediathek.py +++ b/youtube_dl/extractor/srmediathek.py @@ -8,7 +8,7 @@ from ..utils import js_to_json class SRMediathekIE(InfoExtractor): - IE_DESC = 'Süddeutscher Rundfunk' + IE_DESC = 'Saarländischer Rundfunk' _VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P[0-9]+)' _TEST = { From 4f264c02c73c8f7551bcdbc960c9635860003196 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 29 Jan 2015 13:37:17 +0100 Subject: [PATCH 082/327] [utils] YoutubeDLHTTPSHandler.https_open: pass all required arguments to do_open With this change the '--no-check-certificate' works again (#4807). --- youtube_dl/utils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b8c52af74..a4c9813ec 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -654,9 +654,14 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): self._params = params def https_open(self, req): + kwargs = {} + if hasattr(self, '_context'): # python > 2.6 + kwargs['context'] = self._context + if hasattr(self, '_check_hostname'): # python 3.x + kwargs['check_hostname'] = self._check_hostname return self.do_open(functools.partial( _create_http_connection, self, self._https_conn_class, True), - req) + req, **kwargs) def parse_iso8601(date_str, delimiter='T'): From affd04a45dcc46f21b55e4d9419812c33b9aa9ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 Jan 2015 22:09:59 +0600 Subject: [PATCH 083/327] [xuite] Simplify and improve --- youtube_dl/extractor/xuite.py | 181 +++++++++++++++------------------- 1 file changed, 82 insertions(+), 99 deletions(-) diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py index 8466d4bc5..a9dbf8c2d 100644 --- a/youtube_dl/extractor/xuite.py +++ b/youtube_dl/extractor/xuite.py @@ -2,158 +2,141 @@ from __future__ import unicode_literals import base64 + from .common import InfoExtractor -from ..compat import ( - compat_urlparse, - compat_urllib_parse_unquote, - compat_parse_qs -) +from ..compat import compat_urllib_parse_unquote from ..utils import ( ExtractorError, parse_iso8601, - parse_duration + parse_duration, ) -# ref: http://stackoverflow.com/questions/475074/regex-to-parse-or-validate-base64-data -REGEX_BASE64 = r'(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?' - -CST_ZONE = +8 # China Standard Time - class XuiteIE(InfoExtractor): - _VALID_URL = r'http://vlog.xuite.net/play/(?P%s)(/.*)?' % REGEX_BASE64 + _REGEX_BASE64 = r'(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?' + _VALID_URL = r'http://vlog.xuite.net/(?:play|embed)/(?P%s)' % _REGEX_BASE64 _TESTS = [{ # Audio 'url': 'http://vlog.xuite.net/play/RGkzc1ZULTM4NjA5MTQuZmx2', 'md5': '63a42c705772aa53fd4c1a0027f86adf', 'info_dict': { - 'id': 'RGkzc1ZULTM4NjA5MTQuZmx2', + 'id': '3860914', 'ext': 'mp3', - 'upload_date': '20110902', - 'uploader_id': '15973816', - 'uploader': '阿能', - 'timestamp': 1314932940, 'title': '孤單南半球-歐德陽', 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 247.246, + 'timestamp': 1314932940, + 'upload_date': '20110902', + 'uploader': '阿能', + 'uploader_id': '15973816', 'categories': ['個人短片'], - 'duration': 247.246 - } - }, { - # Audio with alternative form of url - 'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9', - 'md5': 'c91645f324de53d82ebb80930e1a73d2', - 'info_dict': { - 'id': 'S1dDUjdyLTMyOTc3NjcuZmx2', - 'ext': 'mp3', - 'upload_date': '20101226', - 'uploader_id': '10102699', - 'uploader': '蠍', - 'timestamp': 1293367080, - 'title': '孫燕姿-眼淚成詩', - 'thumbnail': 're:^https?://.*\.jpg$', - 'categories': ['個人短片'], - 'duration': 223.19 - } + }, }, { # Video with only one format 'url': 'http://vlog.xuite.net/play/TkRZNjhULTM0NDE2MjkuZmx2', 'md5': 'c45737fc8ac5dc8ac2f92ecbcecf505e', 'info_dict': { - 'id': 'TkRZNjhULTM0NDE2MjkuZmx2', + 'id': '3441629', 'ext': 'mp4', - 'upload_date': '20110306', - 'uploader_id': '10400126', - 'uploader': 'Valen', - 'timestamp': 1299383640, 'title': '孫燕姿 - 眼淚成詩', 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 217.399, + 'timestamp': 1299383640, + 'upload_date': '20110306', + 'uploader': 'Valen', + 'uploader_id': '10400126', 'categories': ['影視娛樂'], - 'duration': 217.399 - } + }, }, { # Video with two formats 'url': 'http://vlog.xuite.net/play/bWo1N1pLLTIxMzAxMTcwLmZsdg==', 'md5': '1166e0f461efe55b62e26a2d2a68e6de', 'info_dict': { - 'id': 'bWo1N1pLLTIxMzAxMTcwLmZsdg==', + 'id': '21301170', 'ext': 'mp4', - 'upload_date': '20150117', - 'uploader_id': '242127761', - 'uploader': '我只是想認真點', - 'timestamp': 1421481240, 'title': '暗殺教室 02', + 'description': '字幕:【極影字幕社】', 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 1384.907, + 'timestamp': 1421481240, + 'upload_date': '20150117', + 'uploader': '我只是想認真點', + 'uploader_id': '242127761', 'categories': ['電玩動漫'], - 'duration': 1384.907 - } + }, + }, { + 'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9', + 'only_matching': True, }] - def _flv_config(self, media_id): + def _extract_flv_config(self, media_id): base64_media_id = base64.b64encode(media_id.encode('utf-8')).decode('utf-8') - flv_config_url = 'http://vlog.xuite.net/flash/player?media=' + base64_media_id - flv_config = self._download_xml(flv_config_url, 'flv config') - + flv_config = self._download_xml( + 'http://vlog.xuite.net/flash/player?media=%s' % base64_media_id, + 'flv config') prop_dict = {} for prop in flv_config.findall('./property'): prop_id = base64.b64decode(prop.attrib['id']).decode('utf-8') - + # CDATA may be empty in flv config if not prop.text: - continue # CDATA may be empty in flv config - + continue encoded_content = base64.b64decode(prop.text).decode('utf-8') prop_dict[prop_id] = compat_urllib_parse_unquote(encoded_content) - return prop_dict - def _type_string(self, media_url): - query_string = compat_urlparse.urlparse(media_url).query - type_string = compat_parse_qs(query_string)['q'][0] - return type_string - - def _guess_ext(self, media_url): - type_string = self._type_string(media_url) - if type_string == 'mp3': - return 'mp3' - elif type_string == '360' or type_string == '720': - return 'mp4' - else: - raise ExtractorError('Unknown type string %s' % type_string) - def _real_extract(self, url): video_id = self._match_id(url) - page = self._download_webpage(url, video_id) - media_id = self._html_search_regex(r'data-mediaid="(\d+)"', page, 'media id') - flv_config = self._flv_config(media_id) + webpage = self._download_webpage(url, video_id) - timestamp_local = parse_iso8601(flv_config['publish_datetime'], ' ') - timestamp_gmt = timestamp_local - CST_ZONE * 3600 + error_msg = self._search_regex( + r'
([^<]+)', + webpage, 'error message', default=None) + if error_msg: + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, error_msg), + expected=True) - ret_attrs = { - 'id': video_id, - 'title': flv_config['title'], - 'thumbnail': flv_config['thumb'], - 'uploader': flv_config['author_name'], - 'timestamp': timestamp_gmt, - 'uploader_id': flv_config['author_id'], - 'duration': parse_duration(flv_config['duration']), - 'categories': [flv_config['category']] + video_id = self._html_search_regex( + r'data-mediaid="(\d+)"', webpage, 'media id') + flv_config = self._extract_flv_config(video_id) + + FORMATS = { + 'audio': 'mp3', + 'video': 'mp4', } - if 'hq_src' in flv_config: - urls = [flv_config['src'], flv_config['hq_src']] + formats = [] + for format_tag in ('src', 'hq_src'): + video_url = flv_config.get(format_tag) + if not video_url: + continue + format_id = self._search_regex( + r'\bq=(.+?)\b', video_url, 'format id', default=format_tag) + formats.append({ + 'url': video_url, + 'ext': FORMATS.get(flv_config['type'], 'mp4'), + 'format_id': format_id, + 'height': int(format_id) if format_id.isnumeric() else None, + }) + self._sort_formats(formats) - ret_attrs['formats'] = [] + timestamp = flv_config.get('publish_datetime') + if timestamp: + timestamp = parse_iso8601(timestamp + ' +0800', ' ') - for url in urls: - ret_attrs['formats'].append({ - 'url': url, - 'ext': self._guess_ext(url), - 'format_id': self._type_string(url), - 'height': int(self._type_string(url)) - }) - else: - ret_attrs['url'] = flv_config['src'] - ret_attrs['ext'] = self._guess_ext(flv_config['src']) + category = flv_config.get('category') + categories = [category] if category else [] - return ret_attrs + return { + 'id': video_id, + 'title': flv_config['title'], + 'description': flv_config.get('description'), + 'thumbnail': flv_config.get('thumb'), + 'timestamp': timestamp, + 'uploader': flv_config.get('author_name'), + 'uploader_id': flv_config.get('author_id'), + 'duration': parse_duration(flv_config.get('duration')), + 'categories': categories, + 'formats': formats, + } From 219337990b8008eab81e1f9adf9eba20757f3cad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 Jan 2015 23:11:13 +0600 Subject: [PATCH 084/327] [xuite] Fix _VALID_URL --- youtube_dl/extractor/xuite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py index a9dbf8c2d..4971965f9 100644 --- a/youtube_dl/extractor/xuite.py +++ b/youtube_dl/extractor/xuite.py @@ -14,7 +14,7 @@ from ..utils import ( class XuiteIE(InfoExtractor): _REGEX_BASE64 = r'(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?' - _VALID_URL = r'http://vlog.xuite.net/(?:play|embed)/(?P%s)' % _REGEX_BASE64 + _VALID_URL = r'https?://vlog\.xuite\.net/(?:play|embed)/(?P%s)' % _REGEX_BASE64 _TESTS = [{ # Audio 'url': 'http://vlog.xuite.net/play/RGkzc1ZULTM4NjA5MTQuZmx2', From a7a14d958604f5334413e2fc1872a8317d5e4884 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 29 Jan 2015 20:15:38 +0100 Subject: [PATCH 085/327] [YoutubeDL] set the 'thumbnails' field if the info_dict has the 'thumbnails' field Since the '--write-thumbnail' uses the 'thumbnails' field and we didn't updated the info_dict, it wouldn't detect the thumbnail. (fixes #4812) --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4c238c555..14e92ddcf 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -958,7 +958,7 @@ class YoutubeDL(object): if thumbnails is None: thumbnail = info_dict.get('thumbnail') if thumbnail: - thumbnails = [{'url': thumbnail}] + info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] if thumbnails: thumbnails.sort(key=lambda t: ( t.get('preference'), t.get('width'), t.get('height'), From e683a48d0e8650e1d9e8c323cacb468b104ea7bc Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 30 Jan 2015 05:38:05 +0800 Subject: [PATCH 086/327] [ctsnews] Detect youtube embedde videos --- youtube_dl/extractor/ctsnews.py | 75 +++++++++++++++++++++++++++------ 1 file changed, 63 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/ctsnews.py b/youtube_dl/extractor/ctsnews.py index e1d8c814e..35f3756f5 100644 --- a/youtube_dl/extractor/ctsnews.py +++ b/youtube_dl/extractor/ctsnews.py @@ -1,13 +1,27 @@ -# coding: utf-8 +# -*- coding: utf-8 -*- from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_urllib_request from ..utils import parse_iso8601, ExtractorError class CtsNewsIE(InfoExtractor): + # https connection failed (Connection reset) _VALID_URL = r'http://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P\d+)\.html' _TESTS = [{ + 'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html', + 'md5': 'a9875cb790252b08431186d741beaabe', + 'info_dict': { + 'id': '201501291578109', + 'ext': 'mp4', + 'title': '以色列.真主黨交火 3人死亡', + 'description': 'md5:95e9b295c898b7ff294f09d450178d7d', + 'timestamp': 1422528540, + 'upload_date': '20150129', + } + }, { + # News count not appear on page but still available in database 'url': 'http://news.cts.com.tw/cts/international/201309/201309031304098.html', 'md5': '3aee7e0df7cdff94e43581f54c22619e', 'info_dict': { @@ -19,33 +33,70 @@ class CtsNewsIE(InfoExtractor): 'timestamp': 1378205880, 'upload_date': '20130903', } + }, { + # With Youtube embedded video + 'url': 'http://news.cts.com.tw/cts/money/201501/201501291578003.html', + 'md5': '1d842c771dc94c8c3bca5af2cc1db9c5', + 'add_ie': ['Youtube'], + 'info_dict': { + 'id': 'OVbfO7d0_hQ', + 'ext': 'mp4', + 'title': 'iPhone6熱銷 蘋果財報亮眼', + 'description': 'md5:f395d4f485487bb0f992ed2c4b07aa7d', + 'thumbnail': 're:^https?://.*\.jpg$', + 'upload_date': '20150128', + 'uploader_id': 'TBSCTS', + 'uploader': '中華電視公司', + } }] def _real_extract(self, url): news_id = self._match_id(url) page = self._download_webpage(url, news_id) - if not self._search_regex(r'(CTSPlayer2)', page, 'CTSPlayer2 identifier', fatal=False): - raise ExtractorError('The news includes no videos!') + if self._search_regex(r'(CTSPlayer2)', page, 'CTSPlayer2 identifier', default=None): + feed_url = self._html_search_regex( + r'(http://news\.cts\.com\.tw/action/mp4feed\.php\?news_id=\d+)', + page, 'feed url') + video_url = self._download_webpage(feed_url, news_id) + else: + self.to_screen('Not CTSPlayer video, trying Youtube...') + youtube_url = self._search_regex( + r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url', + default=None) + if not youtube_url: + raise ExtractorError('The news includes no videos!', expected=True) - feed_pattern = r'(http://news.cts.com.tw/action/mp4feed.php\?news_id=\d+)' - feed_url = self._html_search_regex(feed_pattern, page, 'feed url') - feed_page = self._download_webpage(feed_url, news_id) + return { + '_type': 'url', + 'url': youtube_url, + 'ie_key': 'Youtube', + } description = self._html_search_meta('description', page) title = self._html_search_meta('title', page) thumbnail = self._html_search_meta('image', page) - datetime_pattern = r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})' - datetime_str = self._html_search_regex(datetime_pattern, page, 'date and time') - time = (datetime_str + ':00+08:00').replace('/', '-') - timestamp = parse_iso8601(time, delimiter=' ') + datetime_str = self._html_search_regex( + r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time') + # Transform into ISO 8601 format with timezone info + datetime_str = datetime_str.replace('/', '-') + ':00+0800' + timestamp = parse_iso8601(datetime_str, delimiter=' ') + + # Note: the news count may decrease as time goes by + # It should be a bug in CTS website + req = compat_urllib_request.Request( + 'http://news.cts.com.tw/action/news_count.php?callback=cb&news_id=' + news_id) + req.add_header('Referer', url) + newscount_page = self._download_webpage(req, news_id) + news_count = self._search_regex(r'cb\((\d+)\)', newscount_page, 'news count') return { 'id': news_id, + 'url': video_url, 'title': title, 'description': description, - 'url': feed_page, 'thumbnail': thumbnail, - 'timestamp': timestamp + 'timestamp': timestamp, + 'view_count': news_count, } From f345fe9db77a900a603d753c73c5566d62c1cff9 Mon Sep 17 00:00:00 2001 From: Paul Hartmann Date: Thu, 29 Jan 2015 23:32:23 +0100 Subject: [PATCH 087/327] [dctp] fix python 2.6 compatibility --- youtube_dl/extractor/dctp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py index 9b687ef43..31bcd35c3 100644 --- a/youtube_dl/extractor/dctp.py +++ b/youtube_dl/extractor/dctp.py @@ -20,10 +20,10 @@ class DctpTvIE(InfoExtractor): version_json = self._download_json(base_url + 'version.json', video_id) version = version_json['version_name'] info_json = self._download_json( - '{}{}/restapi/slugs/{}.json'.format(base_url, version, video_id), video_id) + '{0}{1}/restapi/slugs/{2}.json'.format(base_url, version, video_id), video_id) object_id = info_json['object_id'] meta_json = self._download_json( - '{}{}/restapi/media/{}.json'.format(base_url, version, object_id), video_id) + '{0}{1}/restapi/media/{2}.json'.format(base_url, version, object_id), video_id) uuid = meta_json['uuid'] title = meta_json['title'] wide = meta_json['is_wide'] @@ -31,7 +31,7 @@ class DctpTvIE(InfoExtractor): ratio = '16x9' else: ratio = '4x3' - play_path = 'mp4:{}_dctp_0500_{}.m4v'.format(uuid, ratio) + play_path = 'mp4:{0}_dctp_0500_{1}.m4v'.format(uuid, ratio) servers_json = self._download_json('http://www.dctp.tv/streaming_servers/', video_id) url = servers_json[0]['endpoint'] From 87673cd4389f6798bb438bb46a306dc08c906cf5 Mon Sep 17 00:00:00 2001 From: Paul Hartmann Date: Thu, 29 Jan 2015 23:34:56 +0100 Subject: [PATCH 088/327] [dctp] follow id conventions --- youtube_dl/extractor/dctp.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py index 31bcd35c3..752cff978 100644 --- a/youtube_dl/extractor/dctp.py +++ b/youtube_dl/extractor/dctp.py @@ -37,12 +37,13 @@ class DctpTvIE(InfoExtractor): url = servers_json[0]['endpoint'] return { - 'id': video_id, + 'id': object_id, 'title': title, 'format': 'rtmp', 'url': url, 'play_path': play_path, 'real_time': True, - 'ext': 'flv' + 'ext': 'flv', + 'display_id': video_id } From 75a4fc5b7233d4cb4376b205c585eb4d3d486241 Mon Sep 17 00:00:00 2001 From: Paul Hartmann Date: Fri, 30 Jan 2015 00:35:53 +0100 Subject: [PATCH 089/327] [dctp] fix test --- youtube_dl/extractor/dctp.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py index 752cff978..c80d28865 100644 --- a/youtube_dl/extractor/dctp.py +++ b/youtube_dl/extractor/dctp.py @@ -9,7 +9,8 @@ class DctpTvIE(InfoExtractor): _TEST = { 'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/', 'info_dict': { - 'id': 'videoinstallation-fuer-eine-kaufhausfassade', + 'id': '1324', + 'display_id': 'videoinstallation-fuer-eine-kaufhausfassade', 'ext': 'flv', 'title': 'Videoinstallation für eine Kaufhausfassade'} } @@ -21,7 +22,7 @@ class DctpTvIE(InfoExtractor): version = version_json['version_name'] info_json = self._download_json( '{0}{1}/restapi/slugs/{2}.json'.format(base_url, version, video_id), video_id) - object_id = info_json['object_id'] + object_id = str(info_json['object_id']) meta_json = self._download_json( '{0}{1}/restapi/media/{2}.json'.format(base_url, version, object_id), video_id) uuid = meta_json['uuid'] From 7bb3ceb4c731c9783d2c565204cbc1071c3545ed Mon Sep 17 00:00:00 2001 From: Paul Hartmann Date: Fri, 30 Jan 2015 00:47:43 +0100 Subject: [PATCH 090/327] [dctp] prefix real_time parameter with rtmp_ --- youtube_dl/downloader/rtmp.py | 2 +- youtube_dl/extractor/dctp.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index 6cb1bfc50..f7eeb6f43 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -104,9 +104,9 @@ class RtmpFD(FileDownloader): live = info_dict.get('rtmp_live', False) conn = info_dict.get('rtmp_conn', None) protocol = info_dict.get('rtmp_protocol', None) + real_time = info_dict.get('rtmp_real_time', False) no_resume = info_dict.get('no_resume', False) continue_dl = info_dict.get('continuedl', False) - real_time = info_dict.get('real_time', False) self.report_destination(filename) tmpfilename = self.temp_name(filename) diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py index c80d28865..8a77f2b66 100644 --- a/youtube_dl/extractor/dctp.py +++ b/youtube_dl/extractor/dctp.py @@ -43,7 +43,7 @@ class DctpTvIE(InfoExtractor): 'format': 'rtmp', 'url': url, 'play_path': play_path, - 'real_time': True, + 'rtmp_real_time': True, 'ext': 'flv', 'display_id': video_id } From adc0ae3ceb9371eb23a9a4d81d0e134ac861723e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 30 Jan 2015 01:56:15 +0100 Subject: [PATCH 091/327] [__init__] Provide a better error messages if URLs are missing (Closes #4813) --- youtube_dl/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 71d2c6f35..e90679ff9 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -361,7 +361,9 @@ def _real_main(argv=None): sys.exit() ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv) - parser.error('you must provide at least one URL') + parser.error( + 'You must provide at least one URL.\n' + 'Type youtube-dl --help to see a list of all options.') try: if opts.load_info_filename is not None: From 4fe8495a23271e164a1ce618682d50f927adc075 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 30 Jan 2015 02:13:37 +0100 Subject: [PATCH 092/327] [viddler] PEP8 --- youtube_dl/extractor/viddler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py index ef104dc29..8516a2940 100644 --- a/youtube_dl/extractor/viddler.py +++ b/youtube_dl/extractor/viddler.py @@ -6,7 +6,7 @@ from ..utils import ( int_or_none, ) from ..compat import ( - compat_urllib_request + compat_urllib_request ) From 83fda3c000a680317cfc9bb6fec899beb8bca773 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 30 Jan 2015 02:57:37 +0100 Subject: [PATCH 093/327] Add a test for --no-check-certificate --- test/test_http.py | 71 ++++++++++++++++++++++++++++++++++++++++++++ test/testcert.pem | 52 ++++++++++++++++++++++++++++++++ youtube_dl/compat.py | 6 ++++ 3 files changed, 129 insertions(+) create mode 100644 test/test_http.py create mode 100644 test/testcert.pem diff --git a/test/test_http.py b/test/test_http.py new file mode 100644 index 000000000..5cce5b3ae --- /dev/null +++ b/test/test_http.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl import YoutubeDL +from youtube_dl.compat import compat_http_server +import ssl +import threading + +TEST_DIR = os.path.dirname(os.path.abspath(__file__)) + +class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): + def log_message(self, format, *args): + pass + + def do_GET(self): + if self.path == '/video.html': + self.send_response(200) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.end_headers() + self.wfile.write(b''], webpage, 'play count', fatal=False)) timestamp = parse_iso8601(self._search_regex( r'