From 163d966707a7e49bcdad4ebd9189922b58223395 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 17 Feb 2015 16:21:02 +0100 Subject: [PATCH 01/67] [downloader/external] curl: Add the '--location' flag curl doesn't follow redirections by default --- youtube_dl/downloader/external.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index ff031d2e0..51c41c704 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -75,7 +75,7 @@ class ExternalFD(FileDownloader): class CurlFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): - cmd = [self.exe, '-o', tmpfilename] + cmd = [self.exe, '--location', '-o', tmpfilename] for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] cmd += self._source_address('--interface') From 662435f7281da418612131512725fa5e367605be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 17 Feb 2015 16:29:24 +0100 Subject: [PATCH 02/67] [YoutubeDL] Use a Request object for getting the cookies (fixes #4970) So that we don't have to implement all the methods used by the cookiejar. --- youtube_dl/YoutubeDL.py | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index ea2435e0a..0a41152d6 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -954,30 +954,9 @@ class YoutubeDL(object): return res def _calc_cookies(self, info_dict): - class _PseudoRequest(object): - def __init__(self, url): - self.url = url - self.headers = {} - self.unverifiable = False - - def add_unredirected_header(self, k, v): - self.headers[k] = v - - def get_full_url(self): - return self.url - - def is_unverifiable(self): - return self.unverifiable - - def has_header(self, h): - return h in self.headers - - def get_header(self, h, default=None): - return self.headers.get(h, default) - - pr = _PseudoRequest(info_dict['url']) + pr = compat_urllib_request.Request(info_dict['url']) self.cookiejar.add_cookie_header(pr) - return pr.headers.get('Cookie') + return pr.get_header('Cookie') def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' From fbc503d69698fca33525707b859b41d491d681ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 17 Feb 2015 16:40:42 +0100 Subject: [PATCH 03/67] [downloader/hls] Fix detection of ffmpeg/avconv (reported in #4966) --- youtube_dl/downloader/hls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 8be4f4249..3650d715e 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -23,7 +23,7 @@ class HlsFD(FileDownloader): tmpfilename = self.temp_name(filename) ffpp = FFmpegPostProcessor(downloader=self) - if not ffpp.available: + if not ffpp.available(): self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') return False ffpp.check_version() From f740fae2a418ce6c1d229890b48730d257a80dee Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 17 Feb 2015 17:26:41 +0100 Subject: [PATCH 04/67] [ffmpeg] Make available a property --- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/postprocessor/ffmpeg.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index ea2435e0a..a74e73b0c 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1301,7 +1301,7 @@ class YoutubeDL(object): downloaded = [] success = True merger = FFmpegMergerPP(self, not self.params.get('keepvideo')) - if not merger.available(): + if not merger.available: postprocessors = [] self.report_warning('You have requested multiple ' 'formats but ffmpeg or avconv are not installed.' diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 16babf6a5..e98fe9834 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -34,7 +34,7 @@ class FFmpegPostProcessor(PostProcessor): self._determine_executables() def check_version(self): - if not self.available(): + if not self.available: raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.') required_version = '10-0' if self._uses_avconv() else '1.0' @@ -108,6 +108,7 @@ class FFmpegPostProcessor(PostProcessor): self.probe_basename = p break + @property def available(self): return self.basename is not None From 65bf37ef83b67bd0e2c594283852d120734b2a3a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 17 Feb 2015 17:27:29 +0100 Subject: [PATCH 05/67] [ffmpeg] Remove trivial helper method --- youtube_dl/postprocessor/ffmpeg.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index e98fe9834..3f2e6cf1d 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -37,7 +37,7 @@ class FFmpegPostProcessor(PostProcessor): if not self.available: raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.') - required_version = '10-0' if self._uses_avconv() else '1.0' + required_version = '10-0' if self.basename == 'avconv' else '1.0' if is_outdated_version( self._versions[self.basename], required_version): warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % ( @@ -112,9 +112,6 @@ class FFmpegPostProcessor(PostProcessor): def available(self): return self.basename is not None - def _uses_avconv(self): - return self.basename == 'avconv' - @property def executable(self): return self._paths[self.basename] From 46312e0b46147ff26008eee62b2f9ba47b30fe73 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 17 Feb 2015 17:29:32 +0100 Subject: [PATCH 06/67] release 2015.02.17 --- README.md | 2 ++ youtube_dl/version.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d68896202..acd0ef41b 100644 --- a/README.md +++ b/README.md @@ -161,6 +161,8 @@ which means you can modify it, redistribute it or use it however you like. --playlist-reverse Download playlist videos in reverse order --xattr-set-filesize (experimental) set file xattribute ytdl.filesize with expected filesize + --hls-prefer-native (experimental) Use the native HLS + downloader instead of ffmpeg. --external-downloader COMMAND (experimental) Use the specified external downloader. Currently supports aria2c,curl,wget diff --git a/youtube_dl/version.py b/youtube_dl/version.py index bbe0e53b5..88834e058 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.02.16.1' +__version__ = '2015.02.17' From 8ac27a68e618ecc789bbef4dd0eb408324498fdd Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 17 Feb 2015 17:35:03 +0100 Subject: [PATCH 07/67] [hls] Switch to available as a property --- youtube_dl/downloader/hls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 3650d715e..8be4f4249 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -23,7 +23,7 @@ class HlsFD(FileDownloader): tmpfilename = self.temp_name(filename) ffpp = FFmpegPostProcessor(downloader=self) - if not ffpp.available(): + if not ffpp.available: self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') return False ffpp.check_version() From 7010577720bbea309566c38adbd912c15400b9fc Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 17 Feb 2015 17:35:08 +0100 Subject: [PATCH 08/67] release 2015.02.17.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 88834e058..4ffbd3ae4 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.02.17' +__version__ = '2015.02.17.1' From 4069766c527d10b8e25b9262a3882101367deb3e Mon Sep 17 00:00:00 2001 From: Sergey M? Date: Tue, 17 Feb 2015 22:31:35 +0600 Subject: [PATCH 09/67] [extractor/common] Test URLs with GET --- youtube_dl/extractor/common.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e74b7bf25..4b0ee882c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -753,9 +753,7 @@ class InfoExtractor(object): def _is_valid_url(self, url, video_id, item='video'): try: - self._request_webpage( - HEADRequest(url), video_id, - 'Checking %s URL' % item) + self._request_webpage(url, video_id, 'Checking %s URL' % item) return True except ExtractorError as e: if isinstance(e.cause, compat_HTTPError): From 0d93378887bd527b1df04e6138b4bc41382dd48f Mon Sep 17 00:00:00 2001 From: Sergey M? Date: Tue, 17 Feb 2015 22:34:29 +0600 Subject: [PATCH 10/67] [videolecturesnet] Check http format URLs (Closes #4968) --- youtube_dl/extractor/videolecturesnet.py | 33 +++++++++++++++++------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/videolecturesnet.py b/youtube_dl/extractor/videolecturesnet.py index ebd2a3dca..1ec5298e9 100644 --- a/youtube_dl/extractor/videolecturesnet.py +++ b/youtube_dl/extractor/videolecturesnet.py @@ -49,15 +49,30 @@ class VideoLecturesNetIE(InfoExtractor): thumbnail = ( None if thumbnail_el is None else thumbnail_el.attrib.get('src')) - formats = [{ - 'url': v.attrib['src'], - 'width': int_or_none(v.attrib.get('width')), - 'height': int_or_none(v.attrib.get('height')), - 'filesize': int_or_none(v.attrib.get('size')), - 'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0, - 'ext': v.attrib.get('ext'), - } for v in switch.findall('./video') - if v.attrib.get('proto') == 'http'] + formats = [] + for v in switch.findall('./video'): + proto = v.attrib.get('proto') + if not proto in ['http', 'rtmp']: + continue + f = { + 'width': int_or_none(v.attrib.get('width')), + 'height': int_or_none(v.attrib.get('height')), + 'filesize': int_or_none(v.attrib.get('size')), + 'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0, + 'ext': v.attrib.get('ext'), + } + src = v.attrib['src'] + if proto == 'http': + if self._is_valid_url(src, video_id): + f['url'] = src + formats.append(f) + elif proto == 'rtmp': + f.update({ + 'url': v.attrib['streamer'], + 'play_path': src, + }) + formats.append(f) + self._sort_formats(formats) return { 'id': video_id, From be24c8697f51fbc68e2cd99668e675a07a3735de Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 17 Feb 2015 17:38:31 +0100 Subject: [PATCH 11/67] release 2015.02.17.2 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 4ffbd3ae4..34a8d1184 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.02.17.1' +__version__ = '2015.02.17.2' From 4cd95bcbc34d74e506e6158251d50d83697da5fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 17 Feb 2015 18:55:53 +0100 Subject: [PATCH 12/67] [twitch:stream] Prefer the 'source' format (fixes #4972) --- youtube_dl/extractor/common.py | 11 +++++++++++ youtube_dl/extractor/twitch.py | 6 ++++++ 2 files changed, 17 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 4b0ee882c..31599bf52 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -839,6 +839,7 @@ class InfoExtractor(object): note='Downloading m3u8 information', errnote='Failed to download m3u8 information') last_info = None + last_media= None kv_rex = re.compile( r'(?P[a-zA-Z_-]+)=(?P"[^"]+"|[^",]+)(?:,|$)') for line in m3u8_doc.splitlines(): @@ -849,6 +850,13 @@ class InfoExtractor(object): if v.startswith('"'): v = v[1:-1] last_info[m.group('key')] = v + elif line.startswith('#EXT-X-MEDIA:'): + last_media = {} + for m in kv_rex.finditer(line): + v = m.group('val') + if v.startswith('"'): + v = v[1:-1] + last_media[m.group('key')] = v elif line.startswith('#') or not line.strip(): continue else: @@ -877,6 +885,9 @@ class InfoExtractor(object): width_str, height_str = resolution.split('x') f['width'] = int(width_str) f['height'] = int(height_str) + if last_media is not None: + f['m3u8_media'] = last_media + last_media = None formats.append(f) last_info = {} self._sort_formats(formats) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 87290d002..8e296698e 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -348,6 +348,12 @@ class TwitchStreamIE(TwitchBaseIE): '%s/api/channel/hls/%s.m3u8?%s' % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')), channel_id, 'mp4') + # prefer the 'source' stream, the others are limited to 30 fps + def _sort_source(f): + if f.get('m3u8_media') is not None and f['m3u8_media'].get('NAME') == 'Source': + return 1 + return 0 + formats = sorted(formats, key=_sort_source) view_count = stream.get('viewers') timestamp = parse_iso8601(stream.get('created_at')) From 4a8d4a53b107dde6e78f37fd63dee22b3d29a877 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Feb 2015 01:16:49 +0600 Subject: [PATCH 13/67] [videolecturesnet] Fix rtmp stream glitches (Closes #4968) --- youtube_dl/extractor/videolecturesnet.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/videolecturesnet.py b/youtube_dl/extractor/videolecturesnet.py index 1ec5298e9..d46234856 100644 --- a/youtube_dl/extractor/videolecturesnet.py +++ b/youtube_dl/extractor/videolecturesnet.py @@ -70,6 +70,7 @@ class VideoLecturesNetIE(InfoExtractor): f.update({ 'url': v.attrib['streamer'], 'play_path': src, + 'rtmp_real_time': True, }) formats.append(f) self._sort_formats(formats) From 98f000409f072e544d6c3b07809022f703b8b23d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 17 Feb 2015 19:54:04 +0100 Subject: [PATCH 14/67] [radio.de] Fix extraction --- youtube_dl/extractor/radiode.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/radiode.py b/youtube_dl/extractor/radiode.py index f95bc9454..aa5f6f8ad 100644 --- a/youtube_dl/extractor/radiode.py +++ b/youtube_dl/extractor/radiode.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import json - from .common import InfoExtractor @@ -10,13 +8,13 @@ class RadioDeIE(InfoExtractor): _VALID_URL = r'https?://(?P.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)' _TEST = { 'url': 'http://ndr2.radio.de/', - 'md5': '3b4cdd011bc59174596b6145cda474a4', 'info_dict': { 'id': 'ndr2', 'ext': 'mp3', 'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': 'md5:591c49c702db1a33751625ebfb67f273', 'thumbnail': 're:^https?://.*\.png', + 'is_live': True, }, 'params': { 'skip_download': True, @@ -25,16 +23,15 @@ class RadioDeIE(InfoExtractor): def _real_extract(self, url): radio_id = self._match_id(url) - webpage = self._download_webpage(url, radio_id) + jscode = self._search_regex( + r"'components/station/stationService':\s*\{\s*'?station'?:\s*(\{.*?\s*\}),\n", + webpage, 'broadcast') - broadcast = json.loads(self._search_regex( - r'_getBroadcast\s*=\s*function\(\s*\)\s*{\s*return\s+({.+?})\s*;\s*}', - webpage, 'broadcast')) - + broadcast = self._parse_json(jscode, radio_id) title = self._live_title(broadcast['name']) description = broadcast.get('description') or broadcast.get('shortDescription') - thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') + thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') or broadcast.get('logo100x100') formats = [{ 'url': stream['streamUrl'], From 5cda4eda7253d766611363a880af46895c11ad17 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 17 Feb 2015 21:37:48 +0100 Subject: [PATCH 15/67] [YoutubeDL] Use a progress hook for progress reporting Instead of every downloader calling two helper functions, let our progress report be an ordinary progress hook like everyone else's. Closes #4875. --- youtube_dl/YoutubeDL.py | 13 +++-- youtube_dl/downloader/common.py | 87 +++++++++++++++++++++------------ youtube_dl/downloader/f4m.py | 70 ++++++++++++++++---------- youtube_dl/downloader/http.py | 13 +++-- youtube_dl/downloader/rtmp.py | 18 +++---- 5 files changed, 128 insertions(+), 73 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 3658332ec..b087d356f 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -199,18 +199,25 @@ class YoutubeDL(object): postprocessor. progress_hooks: A list of functions that get called on download progress, with a dictionary with the entries - * status: One of "downloading" and "finished". + * status: One of "downloading", "error", or "finished". Check this first and ignore unknown values. - If status is one of "downloading" or "finished", the + If status is one of "downloading", or "finished", the following properties may also be present: * filename: The final filename (always present) + * tmpfilename: The filename we're currently writing to * downloaded_bytes: Bytes on disk * total_bytes: Size of the whole file, None if unknown - * tmpfilename: The filename we're currently writing to + * total_bytes_estimate: Guess of the eventual file size, + None if unavailable. + * elapsed: The number of seconds since download started. * eta: The estimated time in seconds, None if unknown * speed: The download speed in bytes/second, None if unknown + * fragment_index: The counter of the currently + downloaded video fragment. + * fragment_count: The number of fragments (= individual + files that will be merged) Progress hooks are guaranteed to be called at least once (with status "finished") if the download is successful. diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 7bb3a948d..45e55b99c 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -1,4 +1,4 @@ -from __future__ import unicode_literals +from __future__ import division, unicode_literals import os import re @@ -54,6 +54,7 @@ class FileDownloader(object): self.ydl = ydl self._progress_hooks = [] self.params = params + self.add_progress_hook(self.report_progress) @staticmethod def format_seconds(seconds): @@ -226,42 +227,64 @@ class FileDownloader(object): self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line) self.to_console_title('youtube-dl ' + msg) - def report_progress(self, percent, data_len_str, speed, eta): - """Report download progress.""" - if self.params.get('noprogress', False): + def report_progress(self, s): + if s['status'] == 'finished': + if self.params.get('noprogress', False): + self.to_screen('[download] Download completed') + else: + s['_total_bytes_str'] = format_bytes(s['total_bytes']) + if s.get('elapsed') is not None: + s['_elapsed_str'] = self.format_seconds(s['elapsed']) + msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s' + else: + msg_template = '100%% of %(_total_bytes_str)s' + self._report_progress_status( + msg_template % s, is_last_line=True) + + if self.params.get('noprogress'): return - if eta is not None: - eta_str = self.format_eta(eta) - else: - eta_str = 'Unknown ETA' - if percent is not None: - percent_str = self.format_percent(percent) - else: - percent_str = 'Unknown %' - speed_str = self.format_speed(speed) - msg = ('%s of %s at %s ETA %s' % - (percent_str, data_len_str, speed_str, eta_str)) - self._report_progress_status(msg) - - def report_progress_live_stream(self, downloaded_data_len, speed, elapsed): - if self.params.get('noprogress', False): + if s['status'] != 'downloading': return - downloaded_str = format_bytes(downloaded_data_len) - speed_str = self.format_speed(speed) - elapsed_str = FileDownloader.format_seconds(elapsed) - msg = '%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str) - self._report_progress_status(msg) - def report_finish(self, data_len_str, tot_time): - """Report download finished.""" - if self.params.get('noprogress', False): - self.to_screen('[download] Download completed') + if s.get('eta') is not None: + s['_eta_str'] = self.format_eta(s['eta']) else: - self._report_progress_status( - ('100%% of %s in %s' % - (data_len_str, self.format_seconds(tot_time))), - is_last_line=True) + s['_eta_str'] = 'Unknown ETA' + + if s.get('total_bytes') and s.get('downloaded_bytes') is not None: + s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes']) + elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None: + s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate']) + else: + if s.get('downloaded_bytes') == 0: + s['_percent_str'] = self.format_percent(0) + else: + s['_percent_str'] = 'Unknown %' + + if s.get('speed') is not None: + s['_speed_str'] = self.format_speed(s['speed']) + else: + s['_speed_str'] = 'Unknown speed' + + if s.get('total_bytes') is not None: + s['_total_bytes_str'] = format_bytes(s['total_bytes']) + msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s' + elif s.get('total_bytes_estimate') is not None: + s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate']) + msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s' + else: + if s.get('downloaded_bytes') is not None: + s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes']) + if s.get('elapsed'): + s['_elapsed_str'] = self.format_seconds(s['elapsed']) + msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)' + else: + msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' + else: + msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s' + + self._report_progress_status(msg_template % s) def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 0e7a1c200..5a1f8e680 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -1,4 +1,4 @@ -from __future__ import unicode_literals +from __future__ import division, unicode_literals import base64 import io @@ -252,17 +252,6 @@ class F4mFD(FileDownloader): requested_bitrate = info_dict.get('tbr') self.to_screen('[download] Downloading f4m manifest') manifest = self.ydl.urlopen(man_url).read() - self.report_destination(filename) - http_dl = HttpQuietDownloader( - self.ydl, - { - 'continuedl': True, - 'quiet': True, - 'noprogress': True, - 'ratelimit': self.params.get('ratelimit', None), - 'test': self.params.get('test', False), - } - ) doc = etree.fromstring(manifest) formats = [(int(f.attrib.get('bitrate', -1)), f) @@ -298,39 +287,67 @@ class F4mFD(FileDownloader): # For some akamai manifests we'll need to add a query to the fragment url akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) + self.report_destination(filename) + http_dl = HttpQuietDownloader( + self.ydl, + { + 'continuedl': True, + 'quiet': True, + 'noprogress': True, + 'ratelimit': self.params.get('ratelimit', None), + 'test': self.params.get('test', False), + } + ) tmpfilename = self.temp_name(filename) (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') + write_flv_header(dest_stream) write_metadata_tag(dest_stream, metadata) # This dict stores the download progress, it's updated by the progress # hook state = { + 'status': 'downloading', 'downloaded_bytes': 0, - 'frag_counter': 0, + 'frag_index': 0, + 'frag_count': total_frags, + 'filename': filename, + 'tmpfilename': tmpfilename, } start = time.time() - def frag_progress_hook(status): - frag_total_bytes = status.get('total_bytes', 0) - estimated_size = (state['downloaded_bytes'] + - (total_frags - state['frag_counter']) * frag_total_bytes) - if status['status'] == 'finished': + def frag_progress_hook(s): + if s['status'] not in ('downloading', 'finished'): + return + + frag_total_bytes = s.get('total_bytes', 0) + if s['status'] == 'finished': state['downloaded_bytes'] += frag_total_bytes - state['frag_counter'] += 1 - progress = self.calc_percent(state['frag_counter'], total_frags) + state['frag_index'] += 1 + + estimated_size = ( + (state['downloaded_bytes'] + frag_total_bytes) + / (state['frag_index'] + 1) * total_frags) + time_now = time.time() + state['total_bytes_estimate'] = estimated_size + state['elapsed'] = time_now - start + + if s['status'] == 'finished': + progress = self.calc_percent(state['frag_index'], total_frags) byte_counter = state['downloaded_bytes'] else: - frag_downloaded_bytes = status['downloaded_bytes'] + frag_downloaded_bytes = s['downloaded_bytes'] byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes frag_progress = self.calc_percent(frag_downloaded_bytes, frag_total_bytes) - progress = self.calc_percent(state['frag_counter'], total_frags) + progress = self.calc_percent(state['frag_index'], total_frags) progress += frag_progress / float(total_frags) - eta = self.calc_eta(start, time.time(), estimated_size, byte_counter) - self.report_progress(progress, format_bytes(estimated_size), - status.get('speed'), eta) + state['eta'] = self.calc_eta( + start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes) + state['speed'] = s.get('speed') + self._hook_progress(state) + http_dl.add_progress_hook(frag_progress_hook) frags_filenames = [] @@ -354,8 +371,8 @@ class F4mFD(FileDownloader): frags_filenames.append(frag_filename) dest_stream.close() - self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start) + elapsed = time.time() - start self.try_rename(tmpfilename, filename) for frag_file in frags_filenames: os.remove(frag_file) @@ -366,6 +383,7 @@ class F4mFD(FileDownloader): 'total_bytes': fsize, 'filename': filename, 'status': 'finished', + 'elapsed': elapsed, }) return True diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 49170cf9d..d37522aea 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -200,16 +200,16 @@ class HttpFD(FileDownloader): else: percent = self.calc_percent(byte_counter, data_len) eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) - self.report_progress(percent, data_len_str, speed, eta) self._hook_progress({ + 'status': 'downloading', 'downloaded_bytes': byte_counter, 'total_bytes': data_len, 'tmpfilename': tmpfilename, 'filename': filename, - 'status': 'downloading', 'eta': eta, 'speed': speed, + 'elapsed': now - start, }) if is_test and byte_counter == data_len: @@ -221,7 +221,13 @@ class HttpFD(FileDownloader): return False if tmpfilename != '-': stream.close() - self.report_finish(data_len_str, (time.time() - start)) + + self._hook_progress({ + 'downloaded_bytes': byte_counter, + 'total_bytes': data_len, + 'tmpfilename': tmpfilename, + 'status': 'error', + }) if data_len is not None and byte_counter != data_len: raise ContentTooShortError(byte_counter, int(data_len)) self.try_rename(tmpfilename, filename) @@ -235,6 +241,7 @@ class HttpFD(FileDownloader): 'total_bytes': byte_counter, 'filename': filename, 'status': 'finished', + 'elapsed': time.time() - start, }) return True diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index f7eeb6f43..79836fe99 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -51,23 +51,23 @@ class RtmpFD(FileDownloader): if not resume_percent: resume_percent = percent resume_downloaded_data_len = downloaded_data_len - eta = self.calc_eta(start, time.time(), 100 - resume_percent, percent - resume_percent) - speed = self.calc_speed(start, time.time(), downloaded_data_len - resume_downloaded_data_len) + time_now = time.time() + eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent) + speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len) data_len = None if percent > 0: data_len = int(downloaded_data_len * 100 / percent) - data_len_str = '~' + format_bytes(data_len) - self.report_progress(percent, data_len_str, speed, eta) - cursor_in_new_line = False self._hook_progress({ + 'status': 'downloading', 'downloaded_bytes': downloaded_data_len, - 'total_bytes': data_len, + 'total_bytes_estimate': data_len, 'tmpfilename': tmpfilename, 'filename': filename, - 'status': 'downloading', 'eta': eta, + 'elapsed': time_now - start, 'speed': speed, }) + cursor_in_new_line = False else: # no percent for live streams mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line) @@ -75,15 +75,15 @@ class RtmpFD(FileDownloader): downloaded_data_len = int(float(mobj.group(1)) * 1024) time_now = time.time() speed = self.calc_speed(start, time_now, downloaded_data_len) - self.report_progress_live_stream(downloaded_data_len, speed, time_now - start) - cursor_in_new_line = False self._hook_progress({ 'downloaded_bytes': downloaded_data_len, 'tmpfilename': tmpfilename, 'filename': filename, 'status': 'downloading', + 'elapsed': time_now - start, 'speed': speed, }) + cursor_in_new_line = False elif self.params.get('verbose', False): if not cursor_in_new_line: self.to_screen('') From 16e7711e22648027739096560914a976b8eea786 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 17 Feb 2015 21:42:31 +0100 Subject: [PATCH 16/67] [downloader/http] Remove gruesome import --- youtube_dl/downloader/http.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 49170cf9d..25032ad4e 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -1,10 +1,9 @@ from __future__ import unicode_literals -import os -import time - -from socket import error as SocketError import errno +import os +import socket +import time from .common import FileDownloader from ..compat import ( @@ -102,7 +101,7 @@ class HttpFD(FileDownloader): resume_len = 0 open_mode = 'wb' break - except SocketError as e: + except socket.error as e: if e.errno != errno.ECONNRESET: # Connection reset is no problem, just retry raise From a91a2c1a83fdd195e850d4ad9c298c01a145ebf0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 17 Feb 2015 21:44:41 +0100 Subject: [PATCH 17/67] [downloader] Remove various unneeded assignments and imports --- youtube_dl/downloader/f4m.py | 3 --- youtube_dl/downloader/http.py | 5 +---- youtube_dl/downloader/rtmp.py | 1 - 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 5a1f8e680..b40ebfa50 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -15,7 +15,6 @@ from ..compat import ( from ..utils import ( struct_pack, struct_unpack, - format_bytes, encodeFilename, sanitize_open, xpath_text, @@ -334,10 +333,8 @@ class F4mFD(FileDownloader): if s['status'] == 'finished': progress = self.calc_percent(state['frag_index'], total_frags) - byte_counter = state['downloaded_bytes'] else: frag_downloaded_bytes = s['downloaded_bytes'] - byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes frag_progress = self.calc_percent(frag_downloaded_bytes, frag_total_bytes) progress = self.calc_percent(state['frag_index'], total_frags) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index d37522aea..6dec528f2 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -15,7 +15,6 @@ from ..utils import ( ContentTooShortError, encodeFilename, sanitize_open, - format_bytes, ) @@ -137,7 +136,6 @@ class HttpFD(FileDownloader): self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) return False - data_len_str = format_bytes(data_len) byte_counter = 0 + resume_len block_size = self.params.get('buffersize', 1024) start = time.time() @@ -196,9 +194,8 @@ class HttpFD(FileDownloader): # Progress message speed = self.calc_speed(start, now, byte_counter - resume_len) if data_len is None: - eta = percent = None + eta = None else: - percent = self.calc_percent(byte_counter, data_len) eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) self._hook_progress({ diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index 79836fe99..0a52c34c7 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -11,7 +11,6 @@ from ..compat import compat_str from ..utils import ( check_executable, encodeFilename, - format_bytes, get_exe_version, ) From fa156077737d5162795a221fe2995a276d31c6d3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 17 Feb 2015 21:46:20 +0100 Subject: [PATCH 18/67] PEP8 fixes --- youtube_dl/extractor/common.py | 3 +-- youtube_dl/extractor/twitch.py | 1 + youtube_dl/extractor/videolecturesnet.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 31599bf52..08b8ad37c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -27,7 +27,6 @@ from ..utils import ( compiled_regex_type, ExtractorError, float_or_none, - HEADRequest, int_or_none, RegexNotFoundError, sanitize_filename, @@ -839,7 +838,7 @@ class InfoExtractor(object): note='Downloading m3u8 information', errnote='Failed to download m3u8 information') last_info = None - last_media= None + last_media = None kv_rex = re.compile( r'(?P[a-zA-Z_-]+)=(?P"[^"]+"|[^",]+)(?:,|$)') for line in m3u8_doc.splitlines(): diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 8e296698e..4b0d8988d 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -348,6 +348,7 @@ class TwitchStreamIE(TwitchBaseIE): '%s/api/channel/hls/%s.m3u8?%s' % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')), channel_id, 'mp4') + # prefer the 'source' stream, the others are limited to 30 fps def _sort_source(f): if f.get('m3u8_media') is not None and f['m3u8_media'].get('NAME') == 'Source': diff --git a/youtube_dl/extractor/videolecturesnet.py b/youtube_dl/extractor/videolecturesnet.py index d46234856..d6a7eb203 100644 --- a/youtube_dl/extractor/videolecturesnet.py +++ b/youtube_dl/extractor/videolecturesnet.py @@ -52,7 +52,7 @@ class VideoLecturesNetIE(InfoExtractor): formats = [] for v in switch.findall('./video'): proto = v.attrib.get('proto') - if not proto in ['http', 'rtmp']: + if proto not in ['http', 'rtmp']: continue f = { 'width': int_or_none(v.attrib.get('width')), From 30965ac66a4e33b8e70d6c0cf5da1a746054d6cd Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 18 Feb 2015 00:27:57 +0100 Subject: [PATCH 19/67] [vimeo] Prevent infinite loops if video password verification fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're seeing this in the tests¹ right now, which do not terminate. ¹ https://travis-ci.org/jaimeMF/youtube-dl/jobs/51135858 --- youtube_dl/extractor/vimeo.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 303e81447..1959d5e36 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -18,6 +18,7 @@ from ..utils import ( InAdvancePagedList, int_or_none, RegexNotFoundError, + smuggle_url, std_headers, unsmuggle_url, urlencode_postdata, @@ -267,8 +268,11 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option') if re.search(r']+?id="pw_form"', webpage) is not None: + if data and '_video_password_verified' in data: + raise ExtractorError('video password verification failed!') self._verify_video_password(url, video_id, webpage) - return self._real_extract(url) + return self._real_extract( + smuggle_url(url, {'_video_password_verified': 'verified'})) else: raise ExtractorError('Unable to extract info section', cause=e) From a3fa5da4967bf2699bfd982cf56a8f627a13bda4 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 18 Feb 2015 00:33:31 +0100 Subject: [PATCH 20/67] [vimeo] Amend playlist tests --- youtube_dl/extractor/vimeo.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 1959d5e36..e7284049d 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -405,6 +405,7 @@ class VimeoChannelIE(InfoExtractor): _TESTS = [{ 'url': 'http://vimeo.com/channels/tributes', 'info_dict': { + 'id': 'tributes', 'title': 'Vimeo Tributes', }, 'playlist_mincount': 25, @@ -483,6 +484,7 @@ class VimeoUserIE(VimeoChannelIE): 'url': 'http://vimeo.com/nkistudio/videos', 'info_dict': { 'title': 'Nki', + 'id': 'nkistudio', }, 'playlist_mincount': 66, }] @@ -500,6 +502,7 @@ class VimeoAlbumIE(VimeoChannelIE): _TESTS = [{ 'url': 'http://vimeo.com/album/2632481', 'info_dict': { + 'id': '2632481', 'title': 'Staff Favorites: November 2013', }, 'playlist_mincount': 13, @@ -530,6 +533,7 @@ class VimeoGroupsIE(VimeoAlbumIE): _TESTS = [{ 'url': 'http://vimeo.com/groups/rolexawards', 'info_dict': { + 'id': 'rolexawards', 'title': 'Rolex Awards for Enterprise', }, 'playlist_mincount': 73, @@ -612,6 +616,7 @@ class VimeoLikesIE(InfoExtractor): 'url': 'https://vimeo.com/user755559/likes/', 'playlist_mincount': 293, "info_dict": { + 'id': 'user755559_likes', "description": "See all the videos urza likes", "title": 'Videos urza likes', }, From 15ec669374727c9bdc3aad0df141fdf305cb80be Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 18 Feb 2015 00:33:41 +0100 Subject: [PATCH 21/67] [vk] Amend playlist test --- youtube_dl/extractor/vk.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 81e02a624..7dea8c59d 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -217,6 +217,9 @@ class VKUserVideosIE(InfoExtractor): _TEMPLATE_URL = 'https://vk.com/videos' _TEST = { 'url': 'http://vk.com/videos205387401', + 'info_dict': { + 'id': '205387401', + }, 'playlist_mincount': 4, } From 1def5f359ef6e197e4cc5d610ed86e109e240080 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 18 Feb 2015 00:34:45 +0100 Subject: [PATCH 22/67] [livestream] Correct playlist ID and add a test for it --- youtube_dl/extractor/livestream.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 5247c6f58..3642089f7 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -37,6 +37,7 @@ class LivestreamIE(InfoExtractor): 'url': 'http://new.livestream.com/tedx/cityenglish', 'info_dict': { 'title': 'TEDCity2.0 (English)', + 'id': '2245590', }, 'playlist_mincount': 4, }, { @@ -148,7 +149,8 @@ class LivestreamIE(InfoExtractor): if is_relevant(video_data, video_id)] if video_id is None: # This is an event page: - return self.playlist_result(videos, info['id'], info['full_name']) + return self.playlist_result( + videos, '%s' % info['id'], info['full_name']) else: if not videos: raise ExtractorError('Cannot find video %s' % video_id) From 6f53c63df66d95ea684f84ef2f7f0e697acc1d20 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 18 Feb 2015 00:37:48 +0100 Subject: [PATCH 23/67] [test/helper] Only output a newline for forgotten keys if keys are really missing --- test/helper.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/helper.py b/test/helper.py index 651ef99b9..fef2db284 100644 --- a/test/helper.py +++ b/test/helper.py @@ -163,7 +163,9 @@ def expect_info_dict(self, got_dict, expected_dict): info_dict_str += ''.join( ' %s: %s,\n' % (_repr(k), _repr(v)) for k, v in test_info_dict.items() if k not in missing_keys) - info_dict_str += '\n' + + if info_dict_str: + info_dict_str += '\n' info_dict_str += ''.join( ' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k])) for k in missing_keys) From f21e915fb9ad53a62df65e5c75c984243aaf0aea Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 18 Feb 2015 00:38:42 +0100 Subject: [PATCH 24/67] [test/helper] Render info_dict with a final comma --- test/helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/helper.py b/test/helper.py index fef2db284..570c91440 100644 --- a/test/helper.py +++ b/test/helper.py @@ -170,7 +170,7 @@ def expect_info_dict(self, got_dict, expected_dict): ' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k])) for k in missing_keys) write_string( - '\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr) + '\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr) self.assertFalse( missing_keys, 'Missing keys in test definition: %s' % ( From 4d278fde645a5ab70b046293a26c12bba146acb0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 18 Feb 2015 00:38:55 +0100 Subject: [PATCH 25/67] [ign] Amend playlist test --- youtube_dl/extractor/ign.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index 3db668cd0..3aade9e74 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -34,6 +34,9 @@ class IGNIE(InfoExtractor): }, { 'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind', + 'info_dict': { + 'id': '100-little-things-in-gta-5-that-will-blow-your-mind', + }, 'playlist': [ { 'info_dict': { From 6e99868e4c73468734f3552d430ba6375c4e0880 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 18 Feb 2015 00:41:45 +0100 Subject: [PATCH 26/67] [buzzfeed] Fix playlist test case --- youtube_dl/extractor/buzzfeed.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/buzzfeed.py b/youtube_dl/extractor/buzzfeed.py index a5d2af174..df503ecc0 100644 --- a/youtube_dl/extractor/buzzfeed.py +++ b/youtube_dl/extractor/buzzfeed.py @@ -33,6 +33,7 @@ class BuzzFeedIE(InfoExtractor): 'skip_download': True, # Got enough YouTube download tests }, 'info_dict': { + 'id': 'look-at-this-cute-dog-omg', 'description': 're:Munchkin the Teddy Bear is back ?!', 'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill', }, @@ -42,8 +43,8 @@ class BuzzFeedIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20141124', 'uploader_id': 'CindysMunchkin', - 'description': 're:© 2014 Munchkin the Shih Tzu', - 'uploader': 'Munchkin the Shih Tzu', + 'description': 're:© 2014 Munchkin the', + 'uploader': 're:^Munchkin the', 'title': 're:Munchkin the Teddy Bear gets her exercise', }, }] From 72c1f8de06f11aea670039a17ca63146a1b85af2 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 18 Feb 2015 00:48:52 +0100 Subject: [PATCH 27/67] [bandcamp:album] Fix extractor results and associated test --- youtube_dl/extractor/bandcamp.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 490cc961a..869294967 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -109,7 +109,7 @@ class BandcampIE(InfoExtractor): class BandcampAlbumIE(InfoExtractor): IE_NAME = 'Bandcamp:album' - _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com(?:/album/(?P[^?#]+)|/?(?:$|[?#]))' + _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))' _TESTS = [{ 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', @@ -133,31 +133,37 @@ class BandcampAlbumIE(InfoExtractor): ], 'info_dict': { 'title': 'Jazz Format Mixtape vol.1', + 'id': 'jazz-format-mixtape-vol-1', + 'uploader_id': 'blazo', }, 'params': { 'playlistend': 2 }, - 'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test' + 'skip': 'Bandcamp imposes download limits.' }, { 'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave', 'info_dict': { 'title': 'Hierophany of the Open Grave', + 'uploader_id': 'nightbringer', + 'id': 'hierophany-of-the-open-grave', }, 'playlist_mincount': 9, }, { 'url': 'http://dotscale.bandcamp.com', 'info_dict': { 'title': 'Loom', + 'id': 'dotscale', + 'uploader_id': 'dotscale', }, 'playlist_mincount': 7, }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - playlist_id = mobj.group('subdomain') - title = mobj.group('title') - display_id = title or playlist_id - webpage = self._download_webpage(url, display_id) + uploader_id = mobj.group('subdomain') + album_id = mobj.group('album_id') + playlist_id = album_id or uploader_id + webpage = self._download_webpage(url, playlist_id) tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage) if not tracks_paths: raise ExtractorError('The page doesn\'t contain any tracks') @@ -168,8 +174,8 @@ class BandcampAlbumIE(InfoExtractor): r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False) return { '_type': 'playlist', + 'uploader_id': uploader_id, 'id': playlist_id, - 'display_id': display_id, 'title': title, 'entries': entries, } From 11e611a7fa809a447939e8a98c8549f2827c862f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 18 Feb 2015 00:49:10 +0100 Subject: [PATCH 28/67] Extend various playlist tests --- youtube_dl/extractor/adultswim.py | 2 ++ youtube_dl/extractor/appletrailers.py | 3 +++ youtube_dl/extractor/dailymotion.py | 1 + youtube_dl/extractor/generic.py | 1 + 4 files changed, 7 insertions(+) diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index 502a9c25a..34b8b0115 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -38,6 +38,7 @@ class AdultSwimIE(InfoExtractor): }, ], 'info_dict': { + 'id': 'rQxZvXQ4ROaSOqq-or2Mow', 'title': 'Rick and Morty - Pilot', 'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. " } @@ -55,6 +56,7 @@ class AdultSwimIE(InfoExtractor): } ], 'info_dict': { + 'id': '-t8CamQlQ2aYZ49ItZCFog', 'title': 'American Dad - Putting Francine Out of Business', 'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].' }, diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index 287f71e07..43e82847f 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -14,6 +14,9 @@ class AppleTrailersIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)' _TEST = { "url": "http://trailers.apple.com/trailers/wb/manofsteel/", + 'info_dict': { + 'id': 'manofsteel', + }, "playlist": [ { "md5": "d97a8e575432dbcb81b7c3acb741f8a8", diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index cf5841a7c..b2dbf4a92 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -194,6 +194,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): 'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q', 'info_dict': { 'title': 'SPORT', + 'id': 'xv4bw_nqtv_sport', }, 'playlist_mincount': 20, }] diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c4ba25a96..36a1f65bf 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -473,6 +473,7 @@ class GenericIE(InfoExtractor): { 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986', 'info_dict': { + 'id': '1986', 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse', }, 'playlist_mincount': 2, From 9cad27008b88b5ba3dfb1d747d7476b05582f385 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 18 Feb 2015 00:49:34 +0100 Subject: [PATCH 29/67] release 2015.02.18 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 34a8d1184..635b6bdb8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.02.17.2' +__version__ = '2015.02.18' From 6d1a55a521b524dd0c562e3855f81434afb6f807 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 18 Feb 2015 10:39:14 +0100 Subject: [PATCH 30/67] [youtube] Show entire player URL when -v is given --- youtube_dl/extractor/youtube.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 35ef4c303..2c02da2b7 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -541,16 +541,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if cache_spec is not None: return lambda s: ''.join(s[i] for i in cache_spec) + download_note = ( + 'Downloading player %s' % player_url + if self._downloader.params.get('verbose') else + 'Downloading %s player %s' % (player_type, player_id) + ) if player_type == 'js': code = self._download_webpage( player_url, video_id, - note='Downloading %s player %s' % (player_type, player_id), + note=download_note, errnote='Download of %s failed' % player_url) res = self._parse_sig_js(code) elif player_type == 'swf': urlh = self._request_webpage( player_url, video_id, - note='Downloading %s player %s' % (player_type, player_id), + note=download_note, errnote='Download of %s failed' % player_url) code = urlh.read() res = self._parse_sig_swf(code) From 785521bf4fbd99b2916bdab5d847d84424196c1d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 18 Feb 2015 10:42:23 +0100 Subject: [PATCH 31/67] [youtube] Remove useless if --- youtube_dl/extractor/youtube.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2c02da2b7..3d3d43491 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -562,10 +562,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): else: assert False, 'Invalid player type %r' % player_type - if cache_spec is None: - test_string = ''.join(map(compat_chr, range(len(example_sig)))) - cache_res = res(test_string) - cache_spec = [ord(c) for c in cache_res] + test_string = ''.join(map(compat_chr, range(len(example_sig)))) + cache_res = res(test_string) + cache_spec = [ord(c) for c in cache_res] self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec) return res From 3eff81fbf72939abf885afcca2f1155cc976999b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 18 Feb 2015 10:47:40 +0100 Subject: [PATCH 32/67] [jsinterp] Disable comment support We need a proper lexer to be able to understand YouTube's code, which contains /* inside of strings. For now it's sufficient to just disable comment support altogether. Fixes #4976, fixes #4979, fixes #4980, fixes #4981, fixes #4982. Closes #4977. --- test/test_jsinterp.py | 11 +++++++++++ test/test_youtube_signature.py | 6 ++++++ youtube_dl/jsinterp.py | 5 +---- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index b91b8c492..fc73e5dc2 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -70,6 +70,8 @@ class TestJSInterpreter(unittest.TestCase): self.assertEqual(jsi.call_function('f'), -11) def test_comments(self): + 'Skipping: Not yet fully implemented' + return jsi = JSInterpreter(''' function x() { var x = /* 1 + */ 2; @@ -80,6 +82,15 @@ class TestJSInterpreter(unittest.TestCase): ''') self.assertEqual(jsi.call_function('x'), 52) + jsi = JSInterpreter(''' + function f() { + var x = "/*"; + var y = 1 /* comment */ + 2; + return y; + } + ''') + self.assertEqual(jsi.call_function('f'), 3) + def test_precedence(self): jsi = JSInterpreter(''' function x() { diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 09696e19a..060864434 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -64,6 +64,12 @@ _TESTS = [ 'js', '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288', '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B' + ), + ( + 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', + 'js', + '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12', + '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3', ) ] diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 453e2732c..0e0c7d90d 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -30,13 +30,10 @@ class JSInterpreter(object): def __init__(self, code, objects=None): if objects is None: objects = {} - self.code = self._remove_comments(code) + self.code = code self._functions = {} self._objects = objects - def _remove_comments(self, code): - return re.sub(r'(?s)/\*.*?\*/', '', code) - def interpret_statement(self, stmt, local_vars, allow_recursion=100): if allow_recursion < 0: raise ExtractorError('Recursion limit reached') From b8b928d5cb5d21390690cc21c29a3d679605015f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 18 Feb 2015 10:54:45 +0100 Subject: [PATCH 33/67] [README] Add an FAQ entry for the player change in anticipation of many more bug reports --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index acd0ef41b..a2c148311 100644 --- a/README.md +++ b/README.md @@ -515,11 +515,15 @@ If you want to play the video on a machine that is not running youtube-dl, you c ### ERROR: no fmt_url_map or conn information found in video info -youtube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`. +YouTube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl. ### ERROR: unable to download video ### -youtube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`. +YouTube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl. + +### ExtractorError: Could not find JS function u'OF' + +In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl. ### SyntaxError: Non-ASCII character ### From 81975f4693c15c1f97ad92e7fafb1463ed2aa4c1 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 18 Feb 2015 10:54:56 +0100 Subject: [PATCH 34/67] release 2015.02.18.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 635b6bdb8..1df05a6df 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.02.18' +__version__ = '2015.02.18.1' From d47c26e16803abc1d15a677d88bbee78f7680db6 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 18 Feb 2015 19:56:10 +0100 Subject: [PATCH 35/67] [brightcove] Correct keys in playlists --- youtube_dl/extractor/brightcove.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index ea0969d4d..0733bece7 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -95,6 +95,7 @@ class BrightcoveIE(InfoExtractor): 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL', 'info_dict': { 'title': 'Sealife', + 'id': '3550319591001', }, 'playlist_mincount': 7, }, @@ -247,7 +248,7 @@ class BrightcoveIE(InfoExtractor): playlist_info = json_data['videoList'] videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']] - return self.playlist_result(videos, playlist_id=playlist_info['id'], + return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'], playlist_title=playlist_info['mediaCollectionDTO']['displayName']) def _extract_video_info(self, video_info): From d8443cd3f7e50fafb918cff5e1676b8d6fef1a64 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 18 Feb 2015 19:56:24 +0100 Subject: [PATCH 36/67] [wsj] Correct test case --- youtube_dl/extractor/wsj.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/wsj.py b/youtube_dl/extractor/wsj.py index cbe3dc7be..2ddf29a69 100644 --- a/youtube_dl/extractor/wsj.py +++ b/youtube_dl/extractor/wsj.py @@ -18,8 +18,8 @@ class WSJIE(InfoExtractor): 'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A', 'ext': 'mp4', 'upload_date': '20150202', - 'uploader_id': 'bbright', - 'creator': 'bbright', + 'uploader_id': 'jdesai', + 'creator': 'jdesai', 'categories': list, # a long list 'duration': 90, 'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo', From 9789d7535d6091f64dd688b0bb1f9869b6c6ca49 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 18 Feb 2015 19:58:41 +0100 Subject: [PATCH 37/67] [xtube] Fix test case --- test/helper.py | 10 ++++++++++ youtube_dl/extractor/xtube.py | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/test/helper.py b/test/helper.py index 570c91440..12afdf184 100644 --- a/test/helper.py +++ b/test/helper.py @@ -113,6 +113,16 @@ def expect_info_dict(self, got_dict, expected_dict): self.assertTrue( got.startswith(start_str), 'field %s (value: %r) should start with %r' % (info_field, got, start_str)) + elif isinstance(expected, compat_str) and expected.startswith('contains:'): + got = got_dict.get(info_field) + contains_str = expected[len('contains:'):] + self.assertTrue( + isinstance(got, compat_str), + 'Expected a %s object, but got %s for field %s' % ( + compat_str.__name__, type(got).__name__, info_field)) + self.assertTrue( + contains_str in got, + 'field %s (value: %r) should contain %r' % (info_field, got, contains_str)) elif isinstance(expected, type): got = got_dict.get(info_field) self.assertTrue(isinstance(got, expected), diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index e8490b028..1644f53c8 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -22,7 +22,7 @@ class XTubeIE(InfoExtractor): 'id': 'kVTUy_G222_', 'ext': 'mp4', 'title': 'strange erotica', - 'description': 'http://www.xtube.com an ET kind of thing', + 'description': 'contains:an ET kind of thing', 'uploader': 'greenshowers', 'duration': 450, 'age_limit': 18, From 85d586617750d38d742a24f141b099f6b898d269 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 18 Feb 2015 20:03:00 +0100 Subject: [PATCH 38/67] [yahoo] Remove md5sum from test case The md5 sum has changed repeatedly, and we check whether it looks like a video anyways nowadays. --- youtube_dl/extractor/yahoo.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index f8e7041a0..97dbac4cc 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -24,7 +24,6 @@ class YahooIE(InfoExtractor): _TESTS = [ { 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', - 'md5': '4962b075c08be8690a922ee026d05e69', 'info_dict': { 'id': '2d25e626-2378-391f-ada0-ddaf1417e588', 'ext': 'mp4', From 93a16ba238c25392ed66aa0f1d7cbd3980c45f2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Wed, 18 Feb 2015 22:00:12 +0100 Subject: [PATCH 39/67] [vimeo] Raise the ExtractorError with expected=True when no video password is given --- youtube_dl/extractor/vimeo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index e7284049d..78d287e0e 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -175,7 +175,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): def _verify_video_password(self, url, video_id, webpage): password = self._downloader.params.get('videopassword', None) if password is None: - raise ExtractorError('This video is protected by a password, use the --video-password option') + raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token') data = compat_urllib_parse.urlencode({ 'password': password, From d305dd73a3d6927f0a2c63d08662a183fa173833 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 18 Feb 2015 23:59:50 +0100 Subject: [PATCH 40/67] [utils] Fix js_to_json Previously, the runtime could be atrocious for longer inputs. --- test/test_utils.py | 4 ++++ youtube_dl/utils.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 1c29d0889..c7373af1e 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -370,6 +370,10 @@ class TestUtil(unittest.TestCase): "playlist":[{"controls":{"all":null}}] }''') + inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"' + json_code = js_to_json(inp) + self.assertEqual(json.loads(json_code), json.loads(inp)) + def test_js_to_json_edgecases(self): on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 3eb6bc6d4..4358137a0 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1560,8 +1560,8 @@ def js_to_json(code): return '"%s"' % v res = re.sub(r'''(?x) - "(?:[^"\\]*(?:\\\\|\\")?)*"| - '(?:[^'\\]*(?:\\\\|\\')?)*'| + "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"| + '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'| [a-zA-Z_][.a-zA-Z_0-9]* ''', fix_kv, code) res = re.sub(r',(\s*\])', lambda m: m.group(1), res) From b81a359eb67aea83799f1306a441fd6163bf1840 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 19 Feb 2015 00:28:58 +0100 Subject: [PATCH 41/67] [YoutubeDL] Use render_table for format listing --- youtube_dl/YoutubeDL.py | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index b087d356f..88809783b 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1534,29 +1534,18 @@ class YoutubeDL(object): return res def list_formats(self, info_dict): - def line(format, idlen=20): - return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % ( - format['format_id'], - format['ext'], - self.format_resolution(format), - self._format_note(format), - )) - formats = info_dict.get('formats', [info_dict]) - idlen = max(len('format code'), - max(len(f['format_id']) for f in formats)) - formats_s = [ - line(f, idlen) for f in formats + table = [ + [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)] + for f in formats if f.get('preference') is None or f['preference'] >= -1000] if len(formats) > 1: - formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)' + table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)' - header_line = line({ - 'format_id': 'format code', 'ext': 'extension', - 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen) + header_line = ['format code', 'extension', 'resolution', 'note'] self.to_screen( - '[info] Available formats for %s:\n%s\n%s' % - (info_dict['id'], header_line, '\n'.join(formats_s))) + '[info] Available formats for %s:\n%s' % + (info_dict['id'], render_table(header_line, table))) def list_thumbnails(self, info_dict): thumbnails = info_dict.get('thumbnails') From c460bdd56b0420bb5175344c0f6523675fdae415 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 19 Feb 2015 00:31:01 +0100 Subject: [PATCH 42/67] [sandia] Add new extractor (#4974) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/sandia.py | 117 +++++++++++++++++++++++++++++++ youtube_dl/utils.py | 11 ++- 3 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/sandia.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7d0909b5d..c712b907f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -386,6 +386,7 @@ from .rutube import ( RutubePersonIE, ) from .rutv import RUTVIE +from .sandia import SandiaIE from .sapo import SapoIE from .savefrom import SaveFromIE from .sbs import SBSIE diff --git a/youtube_dl/extractor/sandia.py b/youtube_dl/extractor/sandia.py new file mode 100644 index 000000000..9c88167f0 --- /dev/null +++ b/youtube_dl/extractor/sandia.py @@ -0,0 +1,117 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import itertools +import json +import re + +from .common import InfoExtractor +from ..compat import ( + compat_urllib_request, + compat_urlparse, +) +from ..utils import ( + int_or_none, + js_to_json, + mimetype2ext, + unified_strdate, +) + + +class SandiaIE(InfoExtractor): + IE_DESC = 'Sandia National Laboratories' + _VALID_URL = r'https?://digitalops\.sandia\.gov/Mediasite/Play/(?P<id>[0-9a-f]+)' + _TEST = { + 'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d', + 'md5': '9422edc9b9a60151727e4b6d8bef393d', + 'info_dict': { + 'id': '24aace4429fc450fb5b38cdbf424a66e1d', + 'ext': 'mp4', + 'title': 'Xyce Software Training - Section 1', + 'description': 're:(?s)SAND Number: SAND 2013-7800.{200,}', + 'upload_date': '20120904', + 'duration': 7794, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4') + webpage = self._download_webpage(req, video_id) + + js_path = self._search_regex( + r'<script type="text/javascript" src="(/Mediasite/FileServer/Presentation/[^"]+)"', + webpage, 'JS code URL') + js_url = compat_urlparse.urljoin(url, js_path) + + js_code = self._download_webpage( + js_url, video_id, note='Downloading player') + + def extract_str(key, **args): + return self._search_regex( + r'Mediasite\.PlaybackManifest\.%s\s*=\s*(.+);\s*?\n' % re.escape(key), + js_code, key, **args) + + def extract_data(key, **args): + data_json = extract_str(key, **args) + if data_json is None: + return data_json + return self._parse_json( + data_json, video_id, transform_source=js_to_json) + + formats = [] + for i in itertools.count(): + fd = extract_data('VideoUrls[%d]' % i, default=None) + if fd is None: + break + formats.append({ + 'format_id': '%s' % i, + 'format_note': fd['MimeType'].partition('/')[2], + 'ext': mimetype2ext(fd['MimeType']), + 'url': fd['Location'], + 'protocol': 'f4m' if fd['MimeType'] == 'video/x-mp4-fragmented' else None, + }) + self._sort_formats(formats) + + slide_baseurl = compat_urlparse.urljoin( + url, extract_data('SlideBaseUrl')) + slide_template = slide_baseurl + re.sub( + r'\{0:D?([0-9+])\}', r'%0\1d', extract_data('SlideImageFileNameTemplate')) + slides = [] + last_slide_time = 0 + for i in itertools.count(1): + sd = extract_str('Slides[%d]' % i, default=None) + if sd is None: + break + timestamp = int_or_none(self._search_regex( + r'^Mediasite\.PlaybackManifest\.CreateSlide\("[^"]*"\s*,\s*([0-9]+),', + sd, 'slide %s timestamp' % i, fatal=False)) + slides.append({ + 'url': slide_template % i, + 'duration': timestamp - last_slide_time, + }) + last_slide_time = timestamp + formats.append({ + 'format_id': 'slides', + 'protocol': 'slideshow', + 'url': json.dumps(slides), + 'preference': -10000, # Downloader not yet written + }) + self._sort_formats(formats) + + title = extract_data('Title') + description = extract_data('Description', fatal=False) + duration = int_or_none(extract_data( + 'Duration', fatal=False), scale=1000) + upload_date = unified_strdate(extract_data('AirDate', fatal=False)) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'formats': formats, + 'upload_date': upload_date, + 'duration': duration, + } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 4358137a0..238b6556b 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1616,6 +1616,15 @@ def args_to_str(args): return ' '.join(shlex_quote(a) for a in args) +def mimetype2ext(mt): + _, _, res = mt.rpartition('/') + + return { + 'x-ms-wmv': 'wmv', + 'x-mp4-fragmented': 'mp4', + }.get(res, res) + + def urlhandle_detect_ext(url_handle): try: url_handle.headers @@ -1631,7 +1640,7 @@ def urlhandle_detect_ext(url_handle): if e: return e - return getheader('Content-Type').split("/")[1] + return mimetype2ext(getheader('Content-Type')) def age_restricted(content_limit, age_limit): From a025d3c5a529a3343d40886e9783fde0bf997b3f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 19 Feb 2015 00:31:23 +0100 Subject: [PATCH 43/67] release 2015.02.19 --- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 4bb68fdc5..e28d7d47c 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -352,6 +352,7 @@ - **rutube:movie**: Rutube movies - **rutube:person**: Rutube person videos - **RUTV**: RUTV.RU + - **Sandia**: Sandia National Laboratories - **Sapo**: SAPO Vídeos - **savefrom.net** - **SBS**: sbs.com.au diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 1df05a6df..8e6ff49d0 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.02.18.1' +__version__ = '2015.02.19' From 770700404365be7b16de89c75825f04e2cf15658 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 19 Feb 2015 00:38:05 +0100 Subject: [PATCH 44/67] [patreon] Modernize --- youtube_dl/extractor/patreon.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py index 5429592a7..e5c9822c5 100644 --- a/youtube_dl/extractor/patreon.py +++ b/youtube_dl/extractor/patreon.py @@ -11,7 +11,7 @@ from ..utils import ( class PatreonIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(.+)' + _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)' _TESTS = [ { 'url': 'http://www.patreon.com/creation?hid=743933', @@ -65,9 +65,7 @@ class PatreonIE(InfoExtractor): ''' def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1) - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._og_search_title(webpage).strip() @@ -80,11 +78,10 @@ class PatreonIE(InfoExtractor): uploader = self._html_search_regex( r'<strong>(.*?)</strong> is creating', webpage, 'uploader') else: - playlist_js = self._search_regex( + playlist = self._parse_json(self._search_regex( r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])', - webpage, 'playlist JSON') - playlist_json = js_to_json(playlist_js) - playlist = json.loads(playlist_json) + webpage, 'playlist JSON'), + video_id, transform_source=js_to_json) data = playlist[0] video_url = self._proto_relative_url(data['mp3']) thumbnail = self._proto_relative_url(data.get('cover')) From 6b961a85fdef8219913b7b97cb33ef8d59d15535 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 19 Feb 2015 01:04:19 +0100 Subject: [PATCH 45/67] [patreon] Add support for embedlies (fixes #4969) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/embedly.py | 16 ++++++++++++++++ youtube_dl/extractor/patreon.py | 26 +++++++++++++++++++++++--- 3 files changed, 40 insertions(+), 3 deletions(-) create mode 100644 youtube_dl/extractor/embedly.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c712b907f..84a7edffc 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -121,6 +121,7 @@ from .ellentv import ( EllenTVClipsIE, ) from .elpais import ElPaisIE +from .embedly import EmbedlyIE from .empflix import EMPFlixIE from .engadget import EngadgetIE from .eporner import EpornerIE diff --git a/youtube_dl/extractor/embedly.py b/youtube_dl/extractor/embedly.py new file mode 100644 index 000000000..1cdb11e34 --- /dev/null +++ b/youtube_dl/extractor/embedly.py @@ -0,0 +1,16 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote + + +class EmbedlyIE(InfoExtractor): + _VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P<id>[^#&]+)' + _TESTS = [{ + 'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1', + 'only_matching': True, + }] + + def _real_extract(self, url): + return self.url_result(compat_urllib_parse_unquote(self._match_id(url))) diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py index e5c9822c5..f179ea200 100644 --- a/youtube_dl/extractor/patreon.py +++ b/youtube_dl/extractor/patreon.py @@ -1,9 +1,6 @@ # encoding: utf-8 from __future__ import unicode_literals -import json -import re - from .common import InfoExtractor from ..utils import ( js_to_json, @@ -35,6 +32,23 @@ class PatreonIE(InfoExtractor): 'thumbnail': 're:^https?://.*$', }, }, + { + 'url': 'https://www.patreon.com/creation?hid=1682498', + 'info_dict': { + 'id': 'SU4fj_aEMVw', + 'ext': 'mp4', + 'title': 'I\'m on Patreon!', + 'uploader': 'TraciJHines', + 'thumbnail': 're:^https?://.*$', + 'upload_date': '20150211', + 'description': 'md5:c5a706b1f687817a3de09db1eb93acd4', + 'uploader_id': 'TraciJHines', + }, + 'params': { + 'noplaylist': True, + 'skip_download': True, + } + } ] # Currently Patreon exposes download URL via hidden CSS, so login is not @@ -72,11 +86,17 @@ class PatreonIE(InfoExtractor): attach_fn = self._html_search_regex( r'<div class="attach"><a target="_blank" href="([^"]+)">', webpage, 'attachment URL', default=None) + embed = self._html_search_regex( + r'<div id="watchCreation">\s*<iframe class="embedly-embed" src="([^"]+)"', + webpage, 'embedded URL', default=None) + if attach_fn is not None: video_url = 'http://www.patreon.com' + attach_fn thumbnail = self._og_search_thumbnail(webpage) uploader = self._html_search_regex( r'<strong>(.*?)</strong> is creating', webpage, 'uploader') + elif embed is not None: + return self.url_result(embed) else: playlist = self._parse_json(self._search_regex( r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])', From 7bd15b1a0378cff6960cd55e40819ef3967f747c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 19 Feb 2015 01:04:24 +0100 Subject: [PATCH 46/67] release 2015.02.19.1 --- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e28d7d47c..1fcec086a 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -121,6 +121,7 @@ - **EllenTV** - **EllenTV:clips** - **ElPais**: El País + - **Embedly** - **EMPFlix** - **Engadget** - **Eporner** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8e6ff49d0..db0ed70da 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.02.19' +__version__ = '2015.02.19.1' From 1c2528c8a3c599740558d5d752dfb368c8714a89 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 19 Feb 2015 01:22:50 +0100 Subject: [PATCH 47/67] [cbs] Modernize --- youtube_dl/extractor/cbs.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index e43756ec6..1ceb9d8d9 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor @@ -39,8 +37,7 @@ class CBSIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) real_id = self._search_regex( r"video\.settings\.pid\s*=\s*'([^']+)';", From 3bf5705316adb4e486ec76a9308198b499787947 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 19 Feb 2015 01:43:20 +0100 Subject: [PATCH 48/67] [imgur] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/imgur.py | 84 ++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 youtube_dl/extractor/imgur.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 84a7edffc..1d1f07ff5 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -205,6 +205,7 @@ from .imdb import ( ImdbIE, ImdbListIE ) +from .imgur import ImgurIE from .ina import InaIE from .infoq import InfoQIE from .instagram import InstagramIE, InstagramUserIE diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py new file mode 100644 index 000000000..16488e0c4 --- /dev/null +++ b/youtube_dl/extractor/imgur.py @@ -0,0 +1,84 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + js_to_json, + mimetype2ext, +) + + +class ImgurIE(InfoExtractor): + _VALID_URL = r'https?://i\.imgur\.com/(?P<id>[a-zA-Z0-9]+)\.(?:mp4|gifv)' + + _TESTS = [{ + 'url': 'https://i.imgur.com/A61SaA1.gifv', + 'info_dict': { + 'id': 'A61SaA1', + 'ext': 'mp4', + 'title': 'MRW gifv is up and running without any bugs', + 'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + width = int_or_none(self._search_regex( + r'<param name="width" value="([0-9]+)"', + webpage, 'width', fatal=False)) + height = int_or_none(self._search_regex( + r'<param name="height" value="([0-9]+)"', + webpage, 'height', fatal=False)) + + formats = [] + video_elements = self._search_regex( + r'(?s)<div class="video-elements">(.*?)</div>', + webpage, 'video elements') + formats = [] + for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements): + formats.append({ + 'format_id': m.group('type').partition('/')[2], + 'url': self._proto_relative_url(m.group('src')), + 'ext': mimetype2ext(m.group('type')), + 'acodec': 'none', + 'width': width, + 'height': height, + 'http_headers': { + 'User-Agent': 'youtube-dl (like wget)', + }, + }) + + gif_json = self._search_regex( + r'(?s)var\s+videoItem\s*=\s*(\{.*?\})', + webpage, 'GIF code', fatal=False) + if gif_json: + gifd = self._parse_json( + gif_json, video_id, transform_source=js_to_json) + formats.append({ + 'format_id': 'gif', + 'preference': -10, + 'width': width, + 'height': height, + 'ext': 'gif', + 'acodec': 'none', + 'vcodec': 'gif', + 'container': 'gif', + 'url': self._proto_relative_url(gifd['gifUrl']), + 'filesize': gifd.get('size'), + 'http_headers': { + 'User-Agent': 'youtube-dl (like wget)', + }, + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'formats': formats, + 'description': self._og_search_description(webpage), + 'title': self._og_search_title(webpage), + } From 1ac1af9b4702fa0b921ceb45f6e7b13b35e0a2de Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 19 Feb 2015 01:43:28 +0100 Subject: [PATCH 49/67] release 2015.02.19.2 --- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 1fcec086a..607e958f6 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -191,6 +191,7 @@ - **ign.com** - **imdb**: Internet Movie Database trailers - **imdb:list**: Internet Movie Database lists + - **Imgur** - **Ina** - **InfoQ** - **Instagram** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index db0ed70da..131dd571b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.02.19.1' +__version__ = '2015.02.19.2' From bd03ffc16e0e09c1ec6fb5c3f2c3083238af5349 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 19 Feb 2015 01:58:52 +0100 Subject: [PATCH 50/67] [netzkino] Skip download in test case Works fine from Germany, but fails from everywhere else --- youtube_dl/extractor/netzkino.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/netzkino.py b/youtube_dl/extractor/netzkino.py index 93567d1e3..bc17e20aa 100644 --- a/youtube_dl/extractor/netzkino.py +++ b/youtube_dl/extractor/netzkino.py @@ -29,6 +29,9 @@ class NetzkinoIE(InfoExtractor): 'timestamp': 1344858571, 'age_limit': 12, }, + 'params': { + 'skip_download': 'Download only works from Germany', + } } def _real_extract(self, url): From f37e3f99f01df51af649a3544570b27facc5d92c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 19 Feb 2015 02:00:50 +0100 Subject: [PATCH 51/67] [generic] Correct test case Video has been reuploaded / edited --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 36a1f65bf..8dce96a64 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -532,7 +532,7 @@ class GenericIE(InfoExtractor): 'info_dict': { 'id': 'Mrj4DVp2zeA', 'ext': 'mp4', - 'upload_date': '20150204', + 'upload_date': '20150212', 'uploader': 'The National Archives UK', 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6', 'uploader_id': 'NationalArchives08', From 71d53ace2f5c4637f5d3390298f475bbeb2452f3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 19 Feb 2015 02:04:28 +0100 Subject: [PATCH 52/67] [sockshare] Do not require thumbnail anymore Thumbnail is not present on the website anymore. --- youtube_dl/extractor/sockshare.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/sockshare.py b/youtube_dl/extractor/sockshare.py index 7d3c0e937..b5fa6f1da 100644 --- a/youtube_dl/extractor/sockshare.py +++ b/youtube_dl/extractor/sockshare.py @@ -25,7 +25,6 @@ class SockshareIE(InfoExtractor): 'id': '437BE28B89D799D7', 'title': 'big_buck_bunny_720p_surround.avi', 'ext': 'avi', - 'thumbnail': 're:^http://.*\.jpg$', } } @@ -45,7 +44,7 @@ class SockshareIE(InfoExtractor): ''', webpage, 'hash') fields = { - "hash": confirm_hash, + "hash": confirm_hash.encode('utf-8'), "confirm": "Continue as Free User" } @@ -68,7 +67,7 @@ class SockshareIE(InfoExtractor): webpage, 'title', default=None) thumbnail = self._html_search_regex( r'<img\s+src="([^"]*)".+?name="bg"', - webpage, 'thumbnail') + webpage, 'thumbnail', default=None) formats = [{ 'format_id': 'sd', From 3d54788495aae464a4ce63b5f093d4409f600dd8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 19 Feb 2015 02:12:08 +0100 Subject: [PATCH 53/67] [webofstories] Fix extraction --- youtube_dl/extractor/webofstories.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/webofstories.py b/youtube_dl/extractor/webofstories.py index 396cf4e83..73077a312 100644 --- a/youtube_dl/extractor/webofstories.py +++ b/youtube_dl/extractor/webofstories.py @@ -45,19 +45,17 @@ class WebOfStoriesIE(InfoExtractor): description = self._html_search_meta('description', webpage) thumbnail = self._og_search_thumbnail(webpage) - story_filename = self._search_regex( - r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename') - speaker_id = self._search_regex( - r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID') - story_id = self._search_regex( - r'\.storyId\((\d+)\)', webpage, 'story ID') - speaker_type = self._search_regex( - r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type') - great_life = self._search_regex( - r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story') + embed_params = [s.strip(" \r\n\t'") for s in self._search_regex( + r'(?s)\$\("#embedCode"\).html\(getEmbedCode\((.*?)\)', + webpage, 'embed params').split(',')] + + ( + _, speaker_id, story_id, story_duration, + speaker_type, great_life, _thumbnail, _has_subtitles, + story_filename, _story_order) = embed_params + is_great_life_series = great_life == 'true' - duration = int_or_none(self._search_regex( - r'\.duration\((\d+)\)', webpage, 'duration', fatal=False)) + duration = int_or_none(story_duration) # URL building, see: http://www.webofstories.com/scripts/player.js ms_prefix = '' From 1a13940c8dada638f8298b6c1406f38d4a3bf270 Mon Sep 17 00:00:00 2001 From: John Boehr <jbboehr@gmail.com> Date: Wed, 18 Feb 2015 18:12:48 -0800 Subject: [PATCH 54/67] [imgur] support regular URL --- youtube_dl/extractor/imgur.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py index 16488e0c4..8449c45f4 100644 --- a/youtube_dl/extractor/imgur.py +++ b/youtube_dl/extractor/imgur.py @@ -7,11 +7,11 @@ from ..utils import ( int_or_none, js_to_json, mimetype2ext, + ExtractorError, ) - class ImgurIE(InfoExtractor): - _VALID_URL = r'https?://i\.imgur\.com/(?P<id>[a-zA-Z0-9]+)\.(?:mp4|gifv)' + _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.)?(?:mp4|gifv)?' _TESTS = [{ 'url': 'https://i.imgur.com/A61SaA1.gifv', @@ -21,12 +21,25 @@ class ImgurIE(InfoExtractor): 'title': 'MRW gifv is up and running without any bugs', 'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.', }, + }, { + 'url': 'https://imgur.com/A61SaA1', + 'info_dict': { + 'id': 'A61SaA1', + 'ext': 'mp4', + 'title': 'MRW gifv is up and running without any bugs', + 'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.', + }, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage) + if not sources: + raise ExtractorError( + 'No sources found for video %s' % video_id, expected=True) + width = int_or_none(self._search_regex( r'<param name="width" value="([0-9]+)"', webpage, 'width', fatal=False)) From 383456aa29c9c47066d422d3fdfdce29da2eee07 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 19 Feb 2015 04:09:52 +0100 Subject: [PATCH 55/67] [Makefile] Also delete *.avi files in clean --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0636fc4cb..573c82685 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe PREFIX ?= /usr/local BINDIR ?= $(PREFIX)/bin From e880c66bd800e4769ffdff41fb9dc88cf3206c12 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 19 Feb 2015 04:12:40 +0100 Subject: [PATCH 56/67] [theonion] Modernize --- youtube_dl/extractor/theonion.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/theonion.py b/youtube_dl/extractor/theonion.py index b65d8e03f..10239c906 100644 --- a/youtube_dl/extractor/theonion.py +++ b/youtube_dl/extractor/theonion.py @@ -4,11 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ExtractorError class TheOnionIE(InfoExtractor): - _VALID_URL = r'(?x)https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<article_id>[0-9]+)/?' + _VALID_URL = r'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?' _TEST = { 'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/', 'md5': '19eaa9a39cf9b9804d982e654dc791ee', @@ -22,10 +21,8 @@ class TheOnionIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - article_id = mobj.group('article_id') - - webpage = self._download_webpage(url, article_id) + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) video_id = self._search_regex( r'"videoId":\s(\d+),', webpage, 'video ID') @@ -34,10 +31,6 @@ class TheOnionIE(InfoExtractor): thumbnail = self._og_search_thumbnail(webpage) sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage) - if not sources: - raise ExtractorError( - 'No sources found for video %s' % video_id, expected=True) - formats = [] for src, type_ in sources: if type_ == 'video/mp4': @@ -54,15 +47,15 @@ class TheOnionIE(InfoExtractor): }) elif type_ == 'application/x-mpegURL': formats.extend( - self._extract_m3u8_formats(src, video_id, preference=-1)) + self._extract_m3u8_formats(src, display_id, preference=-1)) else: self.report_warning( 'Encountered unexpected format: %s' % type_) - self._sort_formats(formats) return { 'id': video_id, + 'display_id': display_id, 'title': title, 'formats': formats, 'thumbnail': thumbnail, From d236b37ac94cd36657c881e18b8d9187483afa80 Mon Sep 17 00:00:00 2001 From: John Boehr <jbboehr@gmail.com> Date: Wed, 18 Feb 2015 19:28:19 -0800 Subject: [PATCH 57/67] [imgur] improve regex #4998 --- youtube_dl/extractor/imgur.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py index 8449c45f4..38c961773 100644 --- a/youtube_dl/extractor/imgur.py +++ b/youtube_dl/extractor/imgur.py @@ -11,7 +11,7 @@ from ..utils import ( ) class ImgurIE(InfoExtractor): - _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.)?(?:mp4|gifv)?' + _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?' _TESTS = [{ 'url': 'https://i.imgur.com/A61SaA1.gifv', From 9e2d7dca87a15cf455fa6c4843a0241ba0b7ad77 Mon Sep 17 00:00:00 2001 From: John Boehr <jbboehr@gmail.com> Date: Wed, 18 Feb 2015 19:47:54 -0800 Subject: [PATCH 58/67] [imgur] improve error check for non-video URLs --- youtube_dl/extractor/imgur.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py index 38c961773..7937a5c81 100644 --- a/youtube_dl/extractor/imgur.py +++ b/youtube_dl/extractor/imgur.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( int_or_none, + str_or_none, js_to_json, mimetype2ext, ExtractorError, @@ -35,11 +36,6 @@ class ImgurIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage) - if not sources: - raise ExtractorError( - 'No sources found for video %s' % video_id, expected=True) - width = int_or_none(self._search_regex( r'<param name="width" value="([0-9]+)"', webpage, 'width', fatal=False)) @@ -47,10 +43,13 @@ class ImgurIE(InfoExtractor): r'<param name="height" value="([0-9]+)"', webpage, 'height', fatal=False)) - formats = [] - video_elements = self._search_regex( + video_elements = str_or_none(self._search_regex( r'(?s)<div class="video-elements">(.*?)</div>', - webpage, 'video elements') + webpage, 'video elements', fatal=False)) + if not video_elements: + raise ExtractorError( + 'No sources found for video %s' % video_id, expected=True) + formats = [] for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements): formats.append({ From b88ba05356bdae07245dd5240b36415002e25d0b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 19 Feb 2015 05:53:09 +0100 Subject: [PATCH 59/67] [imgur] Simplify --- youtube_dl/extractor/imgur.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py index 7937a5c81..b16c7aed0 100644 --- a/youtube_dl/extractor/imgur.py +++ b/youtube_dl/extractor/imgur.py @@ -5,12 +5,12 @@ import re from .common import InfoExtractor from ..utils import ( int_or_none, - str_or_none, js_to_json, mimetype2ext, ExtractorError, ) + class ImgurIE(InfoExtractor): _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?' @@ -43,12 +43,13 @@ class ImgurIE(InfoExtractor): r'<param name="height" value="([0-9]+)"', webpage, 'height', fatal=False)) - video_elements = str_or_none(self._search_regex( + video_elements = self._search_regex( r'(?s)<div class="video-elements">(.*?)</div>', - webpage, 'video elements', fatal=False)) + webpage, 'video elements', default=None) if not video_elements: raise ExtractorError( - 'No sources found for video %s' % video_id, expected=True) + 'No sources found for video %s. Maybe an image?' % video_id, + expected=True) formats = [] for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements): From a3b9157f499e0ba0ef9cfd44b70b0d43795f60f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 19 Feb 2015 13:06:53 +0100 Subject: [PATCH 60/67] [cbssports] Add extractor (closes #4996) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/cbssports.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 youtube_dl/extractor/cbssports.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1d1f07ff5..c2424ed48 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -58,6 +58,7 @@ from .canalplus import CanalplusIE from .canalc2 import Canalc2IE from .cbs import CBSIE from .cbsnews import CBSNewsIE +from .cbssports import CBSSportsIE from .ccc import CCCIE from .ceskatelevize import CeskaTelevizeIE from .channel9 import Channel9IE diff --git a/youtube_dl/extractor/cbssports.py b/youtube_dl/extractor/cbssports.py new file mode 100644 index 000000000..ae47e74cc --- /dev/null +++ b/youtube_dl/extractor/cbssports.py @@ -0,0 +1,30 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class CBSSportsIE(InfoExtractor): + _VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)' + + _TEST = { + 'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s', + 'info_dict': { + 'id': '_d5_GbO8p1sT', + 'ext': 'flv', + 'title': 'US Open flashbacks: 1990s', + 'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + section = mobj.group('section') + video_id = mobj.group('id') + all_videos = self._download_json( + 'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section, + video_id) + # The json file contains the info of all the videos in the section + video_info = next(v for v in all_videos if v['pcid'] == video_id) + return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform') From 5c8a3f862a325987a9ddbdc7f08ea62aad80278e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 19 Feb 2015 15:00:39 +0100 Subject: [PATCH 61/67] [nbc] Use a test video that works outside the US --- youtube_dl/extractor/nbc.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 89a2845fe..3645d3033 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -18,13 +18,13 @@ class NBCIE(InfoExtractor): _TESTS = [ { - 'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', + 'url': 'http://www.nbc.com/the-tonight-show/segments/112966', # md5 checksum is not stable 'info_dict': { - 'id': 'bTmnLCvIbaaH', + 'id': 'c9xnCo0YPOPH', 'ext': 'flv', - 'title': 'I Am a Firefighter', - 'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', + 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s', + 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.', }, }, { From d5c69f1da41f3e370c500482303196ae845472ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 19 Feb 2015 21:47:11 +0600 Subject: [PATCH 62/67] [5min] Cover joystiq.com URLs (Closes #4962) --- youtube_dl/extractor/fivemin.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/fivemin.py b/youtube_dl/extractor/fivemin.py index 5b24b921c..157094e8c 100644 --- a/youtube_dl/extractor/fivemin.py +++ b/youtube_dl/extractor/fivemin.py @@ -14,6 +14,7 @@ class FiveMinIE(InfoExtractor): IE_NAME = '5min' _VALID_URL = r'''(?x) (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=| + https?://(?:(?:massively|www)\.)?joystiq\.com/video/| 5min:) (?P<id>\d+) ''' From e66e1a0046ca804461c4c9c3e890165ec9d865e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 19 Feb 2015 22:15:19 +0600 Subject: [PATCH 63/67] [pornhub] Add support for playlists (Closes #4995) --- youtube_dl/extractor/__init__.py | 5 ++++- youtube_dl/extractor/pornhub.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c2424ed48..9b5855768 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -353,7 +353,10 @@ from .playfm import PlayFMIE from .playvid import PlayvidIE from .podomatic import PodomaticIE from .pornhd import PornHdIE -from .pornhub import PornHubIE +from .pornhub import ( + PornHubIE, + PornHubPlaylistIE, +) from .pornotube import PornotubeIE from .pornoxo import PornoXOIE from .promptfile import PromptFileIE diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index fb2032832..913995044 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -110,3 +110,33 @@ class PornHubIE(InfoExtractor): 'formats': formats, 'age_limit': 18, } + + +class PornHubPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://www.pornhub.com/playlist/6201671', + 'info_dict': { + 'id': '6201671', + 'title': 'P0p4', + }, + 'playlist_mincount': 35, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub') + for video_url in set(re.findall('href="/?(view_video\.php\?viewkey=\d+[^"]*)"', webpage)) + ] + + playlist = self._parse_json( + self._search_regex( + r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'), + playlist_id) + + return self.playlist_result( + entries, playlist_id, playlist.get('title'), playlist.get('description')) From 8fc642eb5b69f25d0488509c4c4f9d8bfe2b7225 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 19 Feb 2015 22:15:49 +0600 Subject: [PATCH 64/67] [pornhub] Fix uploader regex --- youtube_dl/extractor/pornhub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 913995044..3a27e3789 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -56,7 +56,7 @@ class PornHubIE(InfoExtractor): video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title') video_uploader = self._html_search_regex( - r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<', + r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<', webpage, 'uploader', fatal=False) thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False) if thumbnail: From 6140baf4e1aa7015ac6b43e93236ac6a5813569e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 19 Feb 2015 18:17:31 +0100 Subject: [PATCH 65/67] [nationalgeographic] Add extractor (closes #4960) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/nationalgeographic.py | 38 ++++++++++++++++++++++ youtube_dl/extractor/theplatform.py | 4 ++- 3 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/nationalgeographic.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 9b5855768..f225ac654 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -285,6 +285,7 @@ from .myspace import MySpaceIE, MySpaceAlbumIE from .myspass import MySpassIE from .myvideo import MyVideoIE from .myvidster import MyVidsterIE +from .nationalgeographic import NationalGeographicIE from .naver import NaverIE from .nba import NBAIE from .nbc import ( diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py new file mode 100644 index 000000000..c18640c5a --- /dev/null +++ b/youtube_dl/extractor/nationalgeographic.py @@ -0,0 +1,38 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + smuggle_url, + url_basename, +) + + +class NationalGeographicIE(InfoExtractor): + _VALID_URL = r'http://video\.nationalgeographic\.com/video/.*?' + + _TEST = { + 'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo', + 'info_dict': { + 'id': '4DmDACA6Qtk_', + 'ext': 'flv', + 'title': 'Mating Crabs Busted by Sharks', + 'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3', + }, + 'add_ie': ['ThePlatform'], + } + + def _real_extract(self, url): + name = url_basename(url) + + webpage = self._download_webpage(url, name) + feed_url = self._search_regex(r'data-feed-url="([^"]+)"', webpage, 'feed url') + guid = self._search_regex(r'data-video-guid="([^"]+)"', webpage, 'guid') + + feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name) + content = feed.find('.//{http://search.yahoo.com/mrss/}content') + theplatform_id = url_basename(content.attrib.get('url')) + + return self.url_result(smuggle_url( + 'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id, + # For some reason, the normal links don't work and we must force the use of f4m + {'force_smil_url': True})) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 1579822f2..f7b34bd26 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -71,7 +71,9 @@ class ThePlatformIE(SubtitlesInfoExtractor): if not provider_id: provider_id = 'dJ5BDC' - if mobj.group('config'): + if smuggled_data.get('force_smil_url', False): + smil_url = url + elif mobj.group('config'): config_url = url + '&form=json' config_url = config_url.replace('swf/', 'config/') config_url = config_url.replace('onsite/', 'onsite/config/') From a21420389edf665bfeb9610888d2d39dae374945 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 19 Feb 2015 19:28:17 +0100 Subject: [PATCH 66/67] release 2015.02.19.3 --- docs/supportedsites.md | 3 +++ youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 607e958f6..f6ba28e7a 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -68,6 +68,7 @@ - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv - **CBS** - **CBSNews**: CBS News + - **CBSSports** - **CeskaTelevize** - **channel9**: Channel 9 - **Chilloutzone** @@ -264,6 +265,7 @@ - **myvideo** - **MyVidster** - **n-tv.de** + - **NationalGeographic** - **Naver** - **NBA** - **NBC** @@ -321,6 +323,7 @@ - **podomatic** - **PornHd** - **PornHub** + - **PornHubPlaylist** - **Pornotube** - **PornoXO** - **PromptFile** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 131dd571b..537e8cf60 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.02.19.2' +__version__ = '2015.02.19.3' From dd0a58f5f028980ba76450998b218ea7a4920420 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 20 Feb 2015 01:19:38 +0600 Subject: [PATCH 67/67] [blinkx] Fix extraction --- youtube_dl/extractor/blinkx.py | 49 ++++++++++++++++------------------ 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/blinkx.py b/youtube_dl/extractor/blinkx.py index 3e461e715..3b8eabe8f 100644 --- a/youtube_dl/extractor/blinkx.py +++ b/youtube_dl/extractor/blinkx.py @@ -1,40 +1,35 @@ from __future__ import unicode_literals import json -import re from .common import InfoExtractor -from ..utils import remove_start +from ..utils import ( + remove_start, + int_or_none, +) class BlinkxIE(InfoExtractor): - _VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)' + _VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)' IE_NAME = 'blinkx' _TEST = { - 'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB', - 'md5': '2e9a07364af40163a908edbf10bb2492', + 'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ', + 'md5': '337cf7a344663ec79bf93a526a2e06c7', 'info_dict': { - 'id': '8aQUy7GV', + 'id': 'Da0Gw3xc', 'ext': 'mp4', - 'title': 'Police Car Rolls Away', - 'uploader': 'stupidvideos.com', - 'upload_date': '20131215', - 'timestamp': 1387068000, - 'description': 'A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!', - 'duration': 14.886, - 'thumbnails': [{ - 'width': 100, - 'height': 76, - 'resolution': '100x76', - 'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg', - }], + 'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News', + 'uploader': 'IGN News', + 'upload_date': '20150217', + 'timestamp': 1424215740, + 'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.', + 'duration': 47.743333, }, } - def _real_extract(self, rl): - m = re.match(self._VALID_URL, rl) - video_id = m.group('id') + def _real_extract(self, url): + video_id = self._match_id(url) display_id = video_id[:8] api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' + @@ -60,18 +55,20 @@ class BlinkxIE(InfoExtractor): elif m['type'] in ('flv', 'mp4'): vcodec = remove_start(m['vcodec'], 'ff') acodec = remove_start(m['acodec'], 'ff') - tbr = (int(m['vbr']) + int(m['abr'])) // 1000 + vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000) + abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000) + tbr = vbr + abr if vbr and abr else None format_id = '%s-%sk-%s' % (vcodec, tbr, m['w']) formats.append({ 'format_id': format_id, 'url': m['link'], 'vcodec': vcodec, 'acodec': acodec, - 'abr': int(m['abr']) // 1000, - 'vbr': int(m['vbr']) // 1000, + 'abr': abr, + 'vbr': vbr, 'tbr': tbr, - 'width': int(m['w']), - 'height': int(m['h']), + 'width': int_or_none(m.get('w')), + 'height': int_or_none(m.get('h')), }) self._sort_formats(formats)