diff --git a/Makefile b/Makefile index 0636fc4cb..573c82685 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe PREFIX ?= /usr/local BINDIR ?= $(PREFIX)/bin diff --git a/README.md b/README.md index d68896202..a2c148311 100644 --- a/README.md +++ b/README.md @@ -161,6 +161,8 @@ which means you can modify it, redistribute it or use it however you like. --playlist-reverse Download playlist videos in reverse order --xattr-set-filesize (experimental) set file xattribute ytdl.filesize with expected filesize + --hls-prefer-native (experimental) Use the native HLS + downloader instead of ffmpeg. --external-downloader COMMAND (experimental) Use the specified external downloader. Currently supports aria2c,curl,wget @@ -513,11 +515,15 @@ If you want to play the video on a machine that is not running youtube-dl, you c ### ERROR: no fmt_url_map or conn information found in video info -youtube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`. +YouTube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl. ### ERROR: unable to download video ### -youtube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`. +YouTube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl. + +### ExtractorError: Could not find JS function u'OF' + +In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl. ### SyntaxError: Non-ASCII character ### diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 4bb68fdc5..f6ba28e7a 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -68,6 +68,7 @@ - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv - **CBS** - **CBSNews**: CBS News + - **CBSSports** - **CeskaTelevize** - **channel9**: Channel 9 - **Chilloutzone** @@ -121,6 +122,7 @@ - **EllenTV** - **EllenTV:clips** - **ElPais**: El País + - **Embedly** - **EMPFlix** - **Engadget** - **Eporner** @@ -190,6 +192,7 @@ - **ign.com** - **imdb**: Internet Movie Database trailers - **imdb:list**: Internet Movie Database lists + - **Imgur** - **Ina** - **InfoQ** - **Instagram** @@ -262,6 +265,7 @@ - **myvideo** - **MyVidster** - **n-tv.de** + - **NationalGeographic** - **Naver** - **NBA** - **NBC** @@ -319,6 +323,7 @@ - **podomatic** - **PornHd** - **PornHub** + - **PornHubPlaylist** - **Pornotube** - **PornoXO** - **PromptFile** @@ -352,6 +357,7 @@ - **rutube:movie**: Rutube movies - **rutube:person**: Rutube person videos - **RUTV**: RUTV.RU + - **Sandia**: Sandia National Laboratories - **Sapo**: SAPO Vídeos - **savefrom.net** - **SBS**: sbs.com.au diff --git a/test/helper.py b/test/helper.py index 651ef99b9..12afdf184 100644 --- a/test/helper.py +++ b/test/helper.py @@ -113,6 +113,16 @@ def expect_info_dict(self, got_dict, expected_dict): self.assertTrue( got.startswith(start_str), 'field %s (value: %r) should start with %r' % (info_field, got, start_str)) + elif isinstance(expected, compat_str) and expected.startswith('contains:'): + got = got_dict.get(info_field) + contains_str = expected[len('contains:'):] + self.assertTrue( + isinstance(got, compat_str), + 'Expected a %s object, but got %s for field %s' % ( + compat_str.__name__, type(got).__name__, info_field)) + self.assertTrue( + contains_str in got, + 'field %s (value: %r) should contain %r' % (info_field, got, contains_str)) elif isinstance(expected, type): got = got_dict.get(info_field) self.assertTrue(isinstance(got, expected), @@ -163,12 +173,14 @@ def expect_info_dict(self, got_dict, expected_dict): info_dict_str += ''.join( ' %s: %s,\n' % (_repr(k), _repr(v)) for k, v in test_info_dict.items() if k not in missing_keys) - info_dict_str += '\n' + + if info_dict_str: + info_dict_str += '\n' info_dict_str += ''.join( ' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k])) for k in missing_keys) write_string( - '\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr) + '\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr) self.assertFalse( missing_keys, 'Missing keys in test definition: %s' % ( diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index b91b8c492..fc73e5dc2 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -70,6 +70,8 @@ class TestJSInterpreter(unittest.TestCase): self.assertEqual(jsi.call_function('f'), -11) def test_comments(self): + 'Skipping: Not yet fully implemented' + return jsi = JSInterpreter(''' function x() { var x = /* 1 + */ 2; @@ -80,6 +82,15 @@ class TestJSInterpreter(unittest.TestCase): ''') self.assertEqual(jsi.call_function('x'), 52) + jsi = JSInterpreter(''' + function f() { + var x = "/*"; + var y = 1 /* comment */ + 2; + return y; + } + ''') + self.assertEqual(jsi.call_function('f'), 3) + def test_precedence(self): jsi = JSInterpreter(''' function x() { diff --git a/test/test_utils.py b/test/test_utils.py index 1c29d0889..c7373af1e 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -370,6 +370,10 @@ class TestUtil(unittest.TestCase): "playlist":[{"controls":{"all":null}}] }''') + inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"' + json_code = js_to_json(inp) + self.assertEqual(json.loads(json_code), json.loads(inp)) + def test_js_to_json_edgecases(self): on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 09696e19a..060864434 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -64,6 +64,12 @@ _TESTS = [ 'js', '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288', '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B' + ), + ( + 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', + 'js', + '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12', + '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3', ) ] diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index ea2435e0a..88809783b 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -199,18 +199,25 @@ class YoutubeDL(object): postprocessor. progress_hooks: A list of functions that get called on download progress, with a dictionary with the entries - * status: One of "downloading" and "finished". + * status: One of "downloading", "error", or "finished". Check this first and ignore unknown values. - If status is one of "downloading" or "finished", the + If status is one of "downloading", or "finished", the following properties may also be present: * filename: The final filename (always present) + * tmpfilename: The filename we're currently writing to * downloaded_bytes: Bytes on disk * total_bytes: Size of the whole file, None if unknown - * tmpfilename: The filename we're currently writing to + * total_bytes_estimate: Guess of the eventual file size, + None if unavailable. + * elapsed: The number of seconds since download started. * eta: The estimated time in seconds, None if unknown * speed: The download speed in bytes/second, None if unknown + * fragment_index: The counter of the currently + downloaded video fragment. + * fragment_count: The number of fragments (= individual + files that will be merged) Progress hooks are guaranteed to be called at least once (with status "finished") if the download is successful. @@ -954,30 +961,9 @@ class YoutubeDL(object): return res def _calc_cookies(self, info_dict): - class _PseudoRequest(object): - def __init__(self, url): - self.url = url - self.headers = {} - self.unverifiable = False - - def add_unredirected_header(self, k, v): - self.headers[k] = v - - def get_full_url(self): - return self.url - - def is_unverifiable(self): - return self.unverifiable - - def has_header(self, h): - return h in self.headers - - def get_header(self, h, default=None): - return self.headers.get(h, default) - - pr = _PseudoRequest(info_dict['url']) + pr = compat_urllib_request.Request(info_dict['url']) self.cookiejar.add_cookie_header(pr) - return pr.headers.get('Cookie') + return pr.get_header('Cookie') def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' @@ -1301,7 +1287,7 @@ class YoutubeDL(object): downloaded = [] success = True merger = FFmpegMergerPP(self, not self.params.get('keepvideo')) - if not merger.available(): + if not merger.available: postprocessors = [] self.report_warning('You have requested multiple ' 'formats but ffmpeg or avconv are not installed.' @@ -1548,29 +1534,18 @@ class YoutubeDL(object): return res def list_formats(self, info_dict): - def line(format, idlen=20): - return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % ( - format['format_id'], - format['ext'], - self.format_resolution(format), - self._format_note(format), - )) - formats = info_dict.get('formats', [info_dict]) - idlen = max(len('format code'), - max(len(f['format_id']) for f in formats)) - formats_s = [ - line(f, idlen) for f in formats + table = [ + [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)] + for f in formats if f.get('preference') is None or f['preference'] >= -1000] if len(formats) > 1: - formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)' + table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)' - header_line = line({ - 'format_id': 'format code', 'ext': 'extension', - 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen) + header_line = ['format code', 'extension', 'resolution', 'note'] self.to_screen( - '[info] Available formats for %s:\n%s\n%s' % - (info_dict['id'], header_line, '\n'.join(formats_s))) + '[info] Available formats for %s:\n%s' % + (info_dict['id'], render_table(header_line, table))) def list_thumbnails(self, info_dict): thumbnails = info_dict.get('thumbnails') diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 7bb3a948d..45e55b99c 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -1,4 +1,4 @@ -from __future__ import unicode_literals +from __future__ import division, unicode_literals import os import re @@ -54,6 +54,7 @@ class FileDownloader(object): self.ydl = ydl self._progress_hooks = [] self.params = params + self.add_progress_hook(self.report_progress) @staticmethod def format_seconds(seconds): @@ -226,42 +227,64 @@ class FileDownloader(object): self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line) self.to_console_title('youtube-dl ' + msg) - def report_progress(self, percent, data_len_str, speed, eta): - """Report download progress.""" - if self.params.get('noprogress', False): + def report_progress(self, s): + if s['status'] == 'finished': + if self.params.get('noprogress', False): + self.to_screen('[download] Download completed') + else: + s['_total_bytes_str'] = format_bytes(s['total_bytes']) + if s.get('elapsed') is not None: + s['_elapsed_str'] = self.format_seconds(s['elapsed']) + msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s' + else: + msg_template = '100%% of %(_total_bytes_str)s' + self._report_progress_status( + msg_template % s, is_last_line=True) + + if self.params.get('noprogress'): return - if eta is not None: - eta_str = self.format_eta(eta) - else: - eta_str = 'Unknown ETA' - if percent is not None: - percent_str = self.format_percent(percent) - else: - percent_str = 'Unknown %' - speed_str = self.format_speed(speed) - msg = ('%s of %s at %s ETA %s' % - (percent_str, data_len_str, speed_str, eta_str)) - self._report_progress_status(msg) - - def report_progress_live_stream(self, downloaded_data_len, speed, elapsed): - if self.params.get('noprogress', False): + if s['status'] != 'downloading': return - downloaded_str = format_bytes(downloaded_data_len) - speed_str = self.format_speed(speed) - elapsed_str = FileDownloader.format_seconds(elapsed) - msg = '%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str) - self._report_progress_status(msg) - def report_finish(self, data_len_str, tot_time): - """Report download finished.""" - if self.params.get('noprogress', False): - self.to_screen('[download] Download completed') + if s.get('eta') is not None: + s['_eta_str'] = self.format_eta(s['eta']) else: - self._report_progress_status( - ('100%% of %s in %s' % - (data_len_str, self.format_seconds(tot_time))), - is_last_line=True) + s['_eta_str'] = 'Unknown ETA' + + if s.get('total_bytes') and s.get('downloaded_bytes') is not None: + s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes']) + elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None: + s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate']) + else: + if s.get('downloaded_bytes') == 0: + s['_percent_str'] = self.format_percent(0) + else: + s['_percent_str'] = 'Unknown %' + + if s.get('speed') is not None: + s['_speed_str'] = self.format_speed(s['speed']) + else: + s['_speed_str'] = 'Unknown speed' + + if s.get('total_bytes') is not None: + s['_total_bytes_str'] = format_bytes(s['total_bytes']) + msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s' + elif s.get('total_bytes_estimate') is not None: + s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate']) + msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s' + else: + if s.get('downloaded_bytes') is not None: + s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes']) + if s.get('elapsed'): + s['_elapsed_str'] = self.format_seconds(s['elapsed']) + msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)' + else: + msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' + else: + msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s' + + self._report_progress_status(msg_template % s) def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index ff031d2e0..51c41c704 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -75,7 +75,7 @@ class ExternalFD(FileDownloader): class CurlFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): - cmd = [self.exe, '-o', tmpfilename] + cmd = [self.exe, '--location', '-o', tmpfilename] for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] cmd += self._source_address('--interface') diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 0e7a1c200..b40ebfa50 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -1,4 +1,4 @@ -from __future__ import unicode_literals +from __future__ import division, unicode_literals import base64 import io @@ -15,7 +15,6 @@ from ..compat import ( from ..utils import ( struct_pack, struct_unpack, - format_bytes, encodeFilename, sanitize_open, xpath_text, @@ -252,17 +251,6 @@ class F4mFD(FileDownloader): requested_bitrate = info_dict.get('tbr') self.to_screen('[download] Downloading f4m manifest') manifest = self.ydl.urlopen(man_url).read() - self.report_destination(filename) - http_dl = HttpQuietDownloader( - self.ydl, - { - 'continuedl': True, - 'quiet': True, - 'noprogress': True, - 'ratelimit': self.params.get('ratelimit', None), - 'test': self.params.get('test', False), - } - ) doc = etree.fromstring(manifest) formats = [(int(f.attrib.get('bitrate', -1)), f) @@ -298,39 +286,65 @@ class F4mFD(FileDownloader): # For some akamai manifests we'll need to add a query to the fragment url akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) + self.report_destination(filename) + http_dl = HttpQuietDownloader( + self.ydl, + { + 'continuedl': True, + 'quiet': True, + 'noprogress': True, + 'ratelimit': self.params.get('ratelimit', None), + 'test': self.params.get('test', False), + } + ) tmpfilename = self.temp_name(filename) (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') + write_flv_header(dest_stream) write_metadata_tag(dest_stream, metadata) # This dict stores the download progress, it's updated by the progress # hook state = { + 'status': 'downloading', 'downloaded_bytes': 0, - 'frag_counter': 0, + 'frag_index': 0, + 'frag_count': total_frags, + 'filename': filename, + 'tmpfilename': tmpfilename, } start = time.time() - def frag_progress_hook(status): - frag_total_bytes = status.get('total_bytes', 0) - estimated_size = (state['downloaded_bytes'] + - (total_frags - state['frag_counter']) * frag_total_bytes) - if status['status'] == 'finished': + def frag_progress_hook(s): + if s['status'] not in ('downloading', 'finished'): + return + + frag_total_bytes = s.get('total_bytes', 0) + if s['status'] == 'finished': state['downloaded_bytes'] += frag_total_bytes - state['frag_counter'] += 1 - progress = self.calc_percent(state['frag_counter'], total_frags) - byte_counter = state['downloaded_bytes'] + state['frag_index'] += 1 + + estimated_size = ( + (state['downloaded_bytes'] + frag_total_bytes) + / (state['frag_index'] + 1) * total_frags) + time_now = time.time() + state['total_bytes_estimate'] = estimated_size + state['elapsed'] = time_now - start + + if s['status'] == 'finished': + progress = self.calc_percent(state['frag_index'], total_frags) else: - frag_downloaded_bytes = status['downloaded_bytes'] - byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes + frag_downloaded_bytes = s['downloaded_bytes'] frag_progress = self.calc_percent(frag_downloaded_bytes, frag_total_bytes) - progress = self.calc_percent(state['frag_counter'], total_frags) + progress = self.calc_percent(state['frag_index'], total_frags) progress += frag_progress / float(total_frags) - eta = self.calc_eta(start, time.time(), estimated_size, byte_counter) - self.report_progress(progress, format_bytes(estimated_size), - status.get('speed'), eta) + state['eta'] = self.calc_eta( + start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes) + state['speed'] = s.get('speed') + self._hook_progress(state) + http_dl.add_progress_hook(frag_progress_hook) frags_filenames = [] @@ -354,8 +368,8 @@ class F4mFD(FileDownloader): frags_filenames.append(frag_filename) dest_stream.close() - self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start) + elapsed = time.time() - start self.try_rename(tmpfilename, filename) for frag_file in frags_filenames: os.remove(frag_file) @@ -366,6 +380,7 @@ class F4mFD(FileDownloader): 'total_bytes': fsize, 'filename': filename, 'status': 'finished', + 'elapsed': elapsed, }) return True diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 49170cf9d..2e3dac825 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -1,10 +1,9 @@ from __future__ import unicode_literals -import os -import time - -from socket import error as SocketError import errno +import os +import socket +import time from .common import FileDownloader from ..compat import ( @@ -15,7 +14,6 @@ from ..utils import ( ContentTooShortError, encodeFilename, sanitize_open, - format_bytes, ) @@ -102,7 +100,7 @@ class HttpFD(FileDownloader): resume_len = 0 open_mode = 'wb' break - except SocketError as e: + except socket.error as e: if e.errno != errno.ECONNRESET: # Connection reset is no problem, just retry raise @@ -137,7 +135,6 @@ class HttpFD(FileDownloader): self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) return False - data_len_str = format_bytes(data_len) byte_counter = 0 + resume_len block_size = self.params.get('buffersize', 1024) start = time.time() @@ -196,20 +193,19 @@ class HttpFD(FileDownloader): # Progress message speed = self.calc_speed(start, now, byte_counter - resume_len) if data_len is None: - eta = percent = None + eta = None else: - percent = self.calc_percent(byte_counter, data_len) eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) - self.report_progress(percent, data_len_str, speed, eta) self._hook_progress({ + 'status': 'downloading', 'downloaded_bytes': byte_counter, 'total_bytes': data_len, 'tmpfilename': tmpfilename, 'filename': filename, - 'status': 'downloading', 'eta': eta, 'speed': speed, + 'elapsed': now - start, }) if is_test and byte_counter == data_len: @@ -221,7 +217,13 @@ class HttpFD(FileDownloader): return False if tmpfilename != '-': stream.close() - self.report_finish(data_len_str, (time.time() - start)) + + self._hook_progress({ + 'downloaded_bytes': byte_counter, + 'total_bytes': data_len, + 'tmpfilename': tmpfilename, + 'status': 'error', + }) if data_len is not None and byte_counter != data_len: raise ContentTooShortError(byte_counter, int(data_len)) self.try_rename(tmpfilename, filename) @@ -235,6 +237,7 @@ class HttpFD(FileDownloader): 'total_bytes': byte_counter, 'filename': filename, 'status': 'finished', + 'elapsed': time.time() - start, }) return True diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index f7eeb6f43..0a52c34c7 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -11,7 +11,6 @@ from ..compat import compat_str from ..utils import ( check_executable, encodeFilename, - format_bytes, get_exe_version, ) @@ -51,23 +50,23 @@ class RtmpFD(FileDownloader): if not resume_percent: resume_percent = percent resume_downloaded_data_len = downloaded_data_len - eta = self.calc_eta(start, time.time(), 100 - resume_percent, percent - resume_percent) - speed = self.calc_speed(start, time.time(), downloaded_data_len - resume_downloaded_data_len) + time_now = time.time() + eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent) + speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len) data_len = None if percent > 0: data_len = int(downloaded_data_len * 100 / percent) - data_len_str = '~' + format_bytes(data_len) - self.report_progress(percent, data_len_str, speed, eta) - cursor_in_new_line = False self._hook_progress({ + 'status': 'downloading', 'downloaded_bytes': downloaded_data_len, - 'total_bytes': data_len, + 'total_bytes_estimate': data_len, 'tmpfilename': tmpfilename, 'filename': filename, - 'status': 'downloading', 'eta': eta, + 'elapsed': time_now - start, 'speed': speed, }) + cursor_in_new_line = False else: # no percent for live streams mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line) @@ -75,15 +74,15 @@ class RtmpFD(FileDownloader): downloaded_data_len = int(float(mobj.group(1)) * 1024) time_now = time.time() speed = self.calc_speed(start, time_now, downloaded_data_len) - self.report_progress_live_stream(downloaded_data_len, speed, time_now - start) - cursor_in_new_line = False self._hook_progress({ 'downloaded_bytes': downloaded_data_len, 'tmpfilename': tmpfilename, 'filename': filename, 'status': 'downloading', + 'elapsed': time_now - start, 'speed': speed, }) + cursor_in_new_line = False elif self.params.get('verbose', False): if not cursor_in_new_line: self.to_screen('') diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a8e67eaa2..bbe00cb46 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -58,6 +58,7 @@ from .canalplus import CanalplusIE from .canalc2 import Canalc2IE from .cbs import CBSIE from .cbsnews import CBSNewsIE +from .cbssports import CBSSportsIE from .ccc import CCCIE from .ceskatelevize import CeskaTelevizeIE from .channel9 import Channel9IE @@ -121,6 +122,7 @@ from .ellentv import ( EllenTVClipsIE, ) from .elpais import ElPaisIE +from .embedly import EmbedlyIE from .empflix import EMPFlixIE from .engadget import EngadgetIE from .eporner import EpornerIE @@ -204,6 +206,7 @@ from .imdb import ( ImdbIE, ImdbListIE ) +from .imgur import ImgurIE from .ina import InaIE from .infoq import InfoQIE from .instagram import InstagramIE, InstagramUserIE @@ -283,6 +286,7 @@ from .myspace import MySpaceIE, MySpaceAlbumIE from .myspass import MySpassIE from .myvideo import MyVideoIE from .myvidster import MyVidsterIE +from .nationalgeographic import NationalGeographicIE from .naver import NaverIE from .nba import NBAIE from .nbc import ( @@ -351,7 +355,10 @@ from .playfm import PlayFMIE from .playvid import PlayvidIE from .podomatic import PodomaticIE from .pornhd import PornHdIE -from .pornhub import PornHubIE +from .pornhub import ( + PornHubIE, + PornHubPlaylistIE, +) from .pornotube import PornotubeIE from .pornoxo import PornoXOIE from .promptfile import PromptFileIE @@ -387,6 +394,7 @@ from .rutube import ( RutubePersonIE, ) from .rutv import RUTVIE +from .sandia import SandiaIE from .sapo import SapoIE from .savefrom import SaveFromIE from .sbs import SBSIE diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index 502a9c25a..34b8b0115 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -38,6 +38,7 @@ class AdultSwimIE(InfoExtractor): }, ], 'info_dict': { + 'id': 'rQxZvXQ4ROaSOqq-or2Mow', 'title': 'Rick and Morty - Pilot', 'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. " } @@ -55,6 +56,7 @@ class AdultSwimIE(InfoExtractor): } ], 'info_dict': { + 'id': '-t8CamQlQ2aYZ49ItZCFog', 'title': 'American Dad - Putting Francine Out of Business', 'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].' }, diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index 287f71e07..43e82847f 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -14,6 +14,9 @@ class AppleTrailersIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P[^/]+)/(?P[^/]+)' _TEST = { "url": "http://trailers.apple.com/trailers/wb/manofsteel/", + 'info_dict': { + 'id': 'manofsteel', + }, "playlist": [ { "md5": "d97a8e575432dbcb81b7c3acb741f8a8", diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 490cc961a..869294967 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -109,7 +109,7 @@ class BandcampIE(InfoExtractor): class BandcampAlbumIE(InfoExtractor): IE_NAME = 'Bandcamp:album' - _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com(?:/album/(?P[^?#]+)|/?(?:$|[?#]))' + _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))' _TESTS = [{ 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', @@ -133,31 +133,37 @@ class BandcampAlbumIE(InfoExtractor): ], 'info_dict': { 'title': 'Jazz Format Mixtape vol.1', + 'id': 'jazz-format-mixtape-vol-1', + 'uploader_id': 'blazo', }, 'params': { 'playlistend': 2 }, - 'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test' + 'skip': 'Bandcamp imposes download limits.' }, { 'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave', 'info_dict': { 'title': 'Hierophany of the Open Grave', + 'uploader_id': 'nightbringer', + 'id': 'hierophany-of-the-open-grave', }, 'playlist_mincount': 9, }, { 'url': 'http://dotscale.bandcamp.com', 'info_dict': { 'title': 'Loom', + 'id': 'dotscale', + 'uploader_id': 'dotscale', }, 'playlist_mincount': 7, }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - playlist_id = mobj.group('subdomain') - title = mobj.group('title') - display_id = title or playlist_id - webpage = self._download_webpage(url, display_id) + uploader_id = mobj.group('subdomain') + album_id = mobj.group('album_id') + playlist_id = album_id or uploader_id + webpage = self._download_webpage(url, playlist_id) tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage) if not tracks_paths: raise ExtractorError('The page doesn\'t contain any tracks') @@ -168,8 +174,8 @@ class BandcampAlbumIE(InfoExtractor): r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False) return { '_type': 'playlist', + 'uploader_id': uploader_id, 'id': playlist_id, - 'display_id': display_id, 'title': title, 'entries': entries, } diff --git a/youtube_dl/extractor/blinkx.py b/youtube_dl/extractor/blinkx.py index 3e461e715..3b8eabe8f 100644 --- a/youtube_dl/extractor/blinkx.py +++ b/youtube_dl/extractor/blinkx.py @@ -1,40 +1,35 @@ from __future__ import unicode_literals import json -import re from .common import InfoExtractor -from ..utils import remove_start +from ..utils import ( + remove_start, + int_or_none, +) class BlinkxIE(InfoExtractor): - _VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)' + _VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)' IE_NAME = 'blinkx' _TEST = { - 'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB', - 'md5': '2e9a07364af40163a908edbf10bb2492', + 'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ', + 'md5': '337cf7a344663ec79bf93a526a2e06c7', 'info_dict': { - 'id': '8aQUy7GV', + 'id': 'Da0Gw3xc', 'ext': 'mp4', - 'title': 'Police Car Rolls Away', - 'uploader': 'stupidvideos.com', - 'upload_date': '20131215', - 'timestamp': 1387068000, - 'description': 'A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!', - 'duration': 14.886, - 'thumbnails': [{ - 'width': 100, - 'height': 76, - 'resolution': '100x76', - 'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg', - }], + 'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News', + 'uploader': 'IGN News', + 'upload_date': '20150217', + 'timestamp': 1424215740, + 'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.', + 'duration': 47.743333, }, } - def _real_extract(self, rl): - m = re.match(self._VALID_URL, rl) - video_id = m.group('id') + def _real_extract(self, url): + video_id = self._match_id(url) display_id = video_id[:8] api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' + @@ -60,18 +55,20 @@ class BlinkxIE(InfoExtractor): elif m['type'] in ('flv', 'mp4'): vcodec = remove_start(m['vcodec'], 'ff') acodec = remove_start(m['acodec'], 'ff') - tbr = (int(m['vbr']) + int(m['abr'])) // 1000 + vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000) + abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000) + tbr = vbr + abr if vbr and abr else None format_id = '%s-%sk-%s' % (vcodec, tbr, m['w']) formats.append({ 'format_id': format_id, 'url': m['link'], 'vcodec': vcodec, 'acodec': acodec, - 'abr': int(m['abr']) // 1000, - 'vbr': int(m['vbr']) // 1000, + 'abr': abr, + 'vbr': vbr, 'tbr': tbr, - 'width': int(m['w']), - 'height': int(m['h']), + 'width': int_or_none(m.get('w')), + 'height': int_or_none(m.get('h')), }) self._sort_formats(formats) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index ea0969d4d..0733bece7 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -95,6 +95,7 @@ class BrightcoveIE(InfoExtractor): 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL', 'info_dict': { 'title': 'Sealife', + 'id': '3550319591001', }, 'playlist_mincount': 7, }, @@ -247,7 +248,7 @@ class BrightcoveIE(InfoExtractor): playlist_info = json_data['videoList'] videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']] - return self.playlist_result(videos, playlist_id=playlist_info['id'], + return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'], playlist_title=playlist_info['mediaCollectionDTO']['displayName']) def _extract_video_info(self, video_info): diff --git a/youtube_dl/extractor/buzzfeed.py b/youtube_dl/extractor/buzzfeed.py index a5d2af174..df503ecc0 100644 --- a/youtube_dl/extractor/buzzfeed.py +++ b/youtube_dl/extractor/buzzfeed.py @@ -33,6 +33,7 @@ class BuzzFeedIE(InfoExtractor): 'skip_download': True, # Got enough YouTube download tests }, 'info_dict': { + 'id': 'look-at-this-cute-dog-omg', 'description': 're:Munchkin the Teddy Bear is back ?!', 'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill', }, @@ -42,8 +43,8 @@ class BuzzFeedIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20141124', 'uploader_id': 'CindysMunchkin', - 'description': 're:© 2014 Munchkin the Shih Tzu', - 'uploader': 'Munchkin the Shih Tzu', + 'description': 're:© 2014 Munchkin the', + 'uploader': 're:^Munchkin the', 'title': 're:Munchkin the Teddy Bear gets her exercise', }, }] diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index e43756ec6..1ceb9d8d9 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor @@ -39,8 +37,7 @@ class CBSIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) real_id = self._search_regex( r"video\.settings\.pid\s*=\s*'([^']+)';", diff --git a/youtube_dl/extractor/cbssports.py b/youtube_dl/extractor/cbssports.py new file mode 100644 index 000000000..ae47e74cc --- /dev/null +++ b/youtube_dl/extractor/cbssports.py @@ -0,0 +1,30 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class CBSSportsIE(InfoExtractor): + _VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)' + + _TEST = { + 'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s', + 'info_dict': { + 'id': '_d5_GbO8p1sT', + 'ext': 'flv', + 'title': 'US Open flashbacks: 1990s', + 'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + section = mobj.group('section') + video_id = mobj.group('id') + all_videos = self._download_json( + 'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section, + video_id) + # The json file contains the info of all the videos in the section + video_info = next(v for v in all_videos if v['pcid'] == video_id) + return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e74b7bf25..08b8ad37c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -27,7 +27,6 @@ from ..utils import ( compiled_regex_type, ExtractorError, float_or_none, - HEADRequest, int_or_none, RegexNotFoundError, sanitize_filename, @@ -753,9 +752,7 @@ class InfoExtractor(object): def _is_valid_url(self, url, video_id, item='video'): try: - self._request_webpage( - HEADRequest(url), video_id, - 'Checking %s URL' % item) + self._request_webpage(url, video_id, 'Checking %s URL' % item) return True except ExtractorError as e: if isinstance(e.cause, compat_HTTPError): @@ -841,6 +838,7 @@ class InfoExtractor(object): note='Downloading m3u8 information', errnote='Failed to download m3u8 information') last_info = None + last_media = None kv_rex = re.compile( r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)') for line in m3u8_doc.splitlines(): @@ -851,6 +849,13 @@ class InfoExtractor(object): if v.startswith('"'): v = v[1:-1] last_info[m.group('key')] = v + elif line.startswith('#EXT-X-MEDIA:'): + last_media = {} + for m in kv_rex.finditer(line): + v = m.group('val') + if v.startswith('"'): + v = v[1:-1] + last_media[m.group('key')] = v elif line.startswith('#') or not line.strip(): continue else: @@ -879,6 +884,9 @@ class InfoExtractor(object): width_str, height_str = resolution.split('x') f['width'] = int(width_str) f['height'] = int(height_str) + if last_media is not None: + f['m3u8_media'] = last_media + last_media = None formats.append(f) last_info = {} self._sort_formats(formats) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index cf5841a7c..b2dbf4a92 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -194,6 +194,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): 'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q', 'info_dict': { 'title': 'SPORT', + 'id': 'xv4bw_nqtv_sport', }, 'playlist_mincount': 20, }] diff --git a/youtube_dl/extractor/embedly.py b/youtube_dl/extractor/embedly.py new file mode 100644 index 000000000..1cdb11e34 --- /dev/null +++ b/youtube_dl/extractor/embedly.py @@ -0,0 +1,16 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote + + +class EmbedlyIE(InfoExtractor): + _VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P<id>[^#&]+)' + _TESTS = [{ + 'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1', + 'only_matching': True, + }] + + def _real_extract(self, url): + return self.url_result(compat_urllib_parse_unquote(self._match_id(url))) diff --git a/youtube_dl/extractor/fivemin.py b/youtube_dl/extractor/fivemin.py index 5b24b921c..157094e8c 100644 --- a/youtube_dl/extractor/fivemin.py +++ b/youtube_dl/extractor/fivemin.py @@ -14,6 +14,7 @@ class FiveMinIE(InfoExtractor): IE_NAME = '5min' _VALID_URL = r'''(?x) (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=| + https?://(?:(?:massively|www)\.)?joystiq\.com/video/| 5min:) (?P<id>\d+) ''' diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c4ba25a96..8dce96a64 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -473,6 +473,7 @@ class GenericIE(InfoExtractor): { 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986', 'info_dict': { + 'id': '1986', 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse', }, 'playlist_mincount': 2, @@ -531,7 +532,7 @@ class GenericIE(InfoExtractor): 'info_dict': { 'id': 'Mrj4DVp2zeA', 'ext': 'mp4', - 'upload_date': '20150204', + 'upload_date': '20150212', 'uploader': 'The National Archives UK', 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6', 'uploader_id': 'NationalArchives08', diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index 3db668cd0..3aade9e74 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -34,6 +34,9 @@ class IGNIE(InfoExtractor): }, { 'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind', + 'info_dict': { + 'id': '100-little-things-in-gta-5-that-will-blow-your-mind', + }, 'playlist': [ { 'info_dict': { diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py new file mode 100644 index 000000000..b16c7aed0 --- /dev/null +++ b/youtube_dl/extractor/imgur.py @@ -0,0 +1,97 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + js_to_json, + mimetype2ext, + ExtractorError, +) + + +class ImgurIE(InfoExtractor): + _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?' + + _TESTS = [{ + 'url': 'https://i.imgur.com/A61SaA1.gifv', + 'info_dict': { + 'id': 'A61SaA1', + 'ext': 'mp4', + 'title': 'MRW gifv is up and running without any bugs', + 'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.', + }, + }, { + 'url': 'https://imgur.com/A61SaA1', + 'info_dict': { + 'id': 'A61SaA1', + 'ext': 'mp4', + 'title': 'MRW gifv is up and running without any bugs', + 'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + width = int_or_none(self._search_regex( + r'<param name="width" value="([0-9]+)"', + webpage, 'width', fatal=False)) + height = int_or_none(self._search_regex( + r'<param name="height" value="([0-9]+)"', + webpage, 'height', fatal=False)) + + video_elements = self._search_regex( + r'(?s)<div class="video-elements">(.*?)</div>', + webpage, 'video elements', default=None) + if not video_elements: + raise ExtractorError( + 'No sources found for video %s. Maybe an image?' % video_id, + expected=True) + + formats = [] + for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements): + formats.append({ + 'format_id': m.group('type').partition('/')[2], + 'url': self._proto_relative_url(m.group('src')), + 'ext': mimetype2ext(m.group('type')), + 'acodec': 'none', + 'width': width, + 'height': height, + 'http_headers': { + 'User-Agent': 'youtube-dl (like wget)', + }, + }) + + gif_json = self._search_regex( + r'(?s)var\s+videoItem\s*=\s*(\{.*?\})', + webpage, 'GIF code', fatal=False) + if gif_json: + gifd = self._parse_json( + gif_json, video_id, transform_source=js_to_json) + formats.append({ + 'format_id': 'gif', + 'preference': -10, + 'width': width, + 'height': height, + 'ext': 'gif', + 'acodec': 'none', + 'vcodec': 'gif', + 'container': 'gif', + 'url': self._proto_relative_url(gifd['gifUrl']), + 'filesize': gifd.get('size'), + 'http_headers': { + 'User-Agent': 'youtube-dl (like wget)', + }, + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'formats': formats, + 'description': self._og_search_description(webpage), + 'title': self._og_search_title(webpage), + } diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 5247c6f58..3642089f7 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -37,6 +37,7 @@ class LivestreamIE(InfoExtractor): 'url': 'http://new.livestream.com/tedx/cityenglish', 'info_dict': { 'title': 'TEDCity2.0 (English)', + 'id': '2245590', }, 'playlist_mincount': 4, }, { @@ -148,7 +149,8 @@ class LivestreamIE(InfoExtractor): if is_relevant(video_data, video_id)] if video_id is None: # This is an event page: - return self.playlist_result(videos, info['id'], info['full_name']) + return self.playlist_result( + videos, '%s' % info['id'], info['full_name']) else: if not videos: raise ExtractorError('Cannot find video %s' % video_id) diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py new file mode 100644 index 000000000..c18640c5a --- /dev/null +++ b/youtube_dl/extractor/nationalgeographic.py @@ -0,0 +1,38 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + smuggle_url, + url_basename, +) + + +class NationalGeographicIE(InfoExtractor): + _VALID_URL = r'http://video\.nationalgeographic\.com/video/.*?' + + _TEST = { + 'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo', + 'info_dict': { + 'id': '4DmDACA6Qtk_', + 'ext': 'flv', + 'title': 'Mating Crabs Busted by Sharks', + 'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3', + }, + 'add_ie': ['ThePlatform'], + } + + def _real_extract(self, url): + name = url_basename(url) + + webpage = self._download_webpage(url, name) + feed_url = self._search_regex(r'data-feed-url="([^"]+)"', webpage, 'feed url') + guid = self._search_regex(r'data-video-guid="([^"]+)"', webpage, 'guid') + + feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name) + content = feed.find('.//{http://search.yahoo.com/mrss/}content') + theplatform_id = url_basename(content.attrib.get('url')) + + return self.url_result(smuggle_url( + 'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id, + # For some reason, the normal links don't work and we must force the use of f4m + {'force_smil_url': True})) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 89a2845fe..3645d3033 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -18,13 +18,13 @@ class NBCIE(InfoExtractor): _TESTS = [ { - 'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', + 'url': 'http://www.nbc.com/the-tonight-show/segments/112966', # md5 checksum is not stable 'info_dict': { - 'id': 'bTmnLCvIbaaH', + 'id': 'c9xnCo0YPOPH', 'ext': 'flv', - 'title': 'I Am a Firefighter', - 'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', + 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s', + 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.', }, }, { diff --git a/youtube_dl/extractor/netzkino.py b/youtube_dl/extractor/netzkino.py index 93567d1e3..bc17e20aa 100644 --- a/youtube_dl/extractor/netzkino.py +++ b/youtube_dl/extractor/netzkino.py @@ -29,6 +29,9 @@ class NetzkinoIE(InfoExtractor): 'timestamp': 1344858571, 'age_limit': 12, }, + 'params': { + 'skip_download': 'Download only works from Germany', + } } def _real_extract(self, url): diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py index 5429592a7..f179ea200 100644 --- a/youtube_dl/extractor/patreon.py +++ b/youtube_dl/extractor/patreon.py @@ -1,9 +1,6 @@ # encoding: utf-8 from __future__ import unicode_literals -import json -import re - from .common import InfoExtractor from ..utils import ( js_to_json, @@ -11,7 +8,7 @@ from ..utils import ( class PatreonIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(.+)' + _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)' _TESTS = [ { 'url': 'http://www.patreon.com/creation?hid=743933', @@ -35,6 +32,23 @@ class PatreonIE(InfoExtractor): 'thumbnail': 're:^https?://.*$', }, }, + { + 'url': 'https://www.patreon.com/creation?hid=1682498', + 'info_dict': { + 'id': 'SU4fj_aEMVw', + 'ext': 'mp4', + 'title': 'I\'m on Patreon!', + 'uploader': 'TraciJHines', + 'thumbnail': 're:^https?://.*$', + 'upload_date': '20150211', + 'description': 'md5:c5a706b1f687817a3de09db1eb93acd4', + 'uploader_id': 'TraciJHines', + }, + 'params': { + 'noplaylist': True, + 'skip_download': True, + } + } ] # Currently Patreon exposes download URL via hidden CSS, so login is not @@ -65,26 +79,29 @@ class PatreonIE(InfoExtractor): ''' def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1) - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._og_search_title(webpage).strip() attach_fn = self._html_search_regex( r'<div class="attach"><a target="_blank" href="([^"]+)">', webpage, 'attachment URL', default=None) + embed = self._html_search_regex( + r'<div id="watchCreation">\s*<iframe class="embedly-embed" src="([^"]+)"', + webpage, 'embedded URL', default=None) + if attach_fn is not None: video_url = 'http://www.patreon.com' + attach_fn thumbnail = self._og_search_thumbnail(webpage) uploader = self._html_search_regex( r'<strong>(.*?)</strong> is creating', webpage, 'uploader') + elif embed is not None: + return self.url_result(embed) else: - playlist_js = self._search_regex( + playlist = self._parse_json(self._search_regex( r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])', - webpage, 'playlist JSON') - playlist_json = js_to_json(playlist_js) - playlist = json.loads(playlist_json) + webpage, 'playlist JSON'), + video_id, transform_source=js_to_json) data = playlist[0] video_url = self._proto_relative_url(data['mp3']) thumbnail = self._proto_relative_url(data.get('cover')) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index fb2032832..3a27e3789 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -56,7 +56,7 @@ class PornHubIE(InfoExtractor): video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title') video_uploader = self._html_search_regex( - r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<', + r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<', webpage, 'uploader', fatal=False) thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False) if thumbnail: @@ -110,3 +110,33 @@ class PornHubIE(InfoExtractor): 'formats': formats, 'age_limit': 18, } + + +class PornHubPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://www.pornhub.com/playlist/6201671', + 'info_dict': { + 'id': '6201671', + 'title': 'P0p4', + }, + 'playlist_mincount': 35, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub') + for video_url in set(re.findall('href="/?(view_video\.php\?viewkey=\d+[^"]*)"', webpage)) + ] + + playlist = self._parse_json( + self._search_regex( + r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'), + playlist_id) + + return self.playlist_result( + entries, playlist_id, playlist.get('title'), playlist.get('description')) diff --git a/youtube_dl/extractor/radiode.py b/youtube_dl/extractor/radiode.py index f95bc9454..aa5f6f8ad 100644 --- a/youtube_dl/extractor/radiode.py +++ b/youtube_dl/extractor/radiode.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import json - from .common import InfoExtractor @@ -10,13 +8,13 @@ class RadioDeIE(InfoExtractor): _VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)' _TEST = { 'url': 'http://ndr2.radio.de/', - 'md5': '3b4cdd011bc59174596b6145cda474a4', 'info_dict': { 'id': 'ndr2', 'ext': 'mp3', 'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': 'md5:591c49c702db1a33751625ebfb67f273', 'thumbnail': 're:^https?://.*\.png', + 'is_live': True, }, 'params': { 'skip_download': True, @@ -25,16 +23,15 @@ class RadioDeIE(InfoExtractor): def _real_extract(self, url): radio_id = self._match_id(url) - webpage = self._download_webpage(url, radio_id) + jscode = self._search_regex( + r"'components/station/stationService':\s*\{\s*'?station'?:\s*(\{.*?\s*\}),\n", + webpage, 'broadcast') - broadcast = json.loads(self._search_regex( - r'_getBroadcast\s*=\s*function\(\s*\)\s*{\s*return\s+({.+?})\s*;\s*}', - webpage, 'broadcast')) - + broadcast = self._parse_json(jscode, radio_id) title = self._live_title(broadcast['name']) description = broadcast.get('description') or broadcast.get('shortDescription') - thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') + thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') or broadcast.get('logo100x100') formats = [{ 'url': stream['streamUrl'], diff --git a/youtube_dl/extractor/sandia.py b/youtube_dl/extractor/sandia.py new file mode 100644 index 000000000..9c88167f0 --- /dev/null +++ b/youtube_dl/extractor/sandia.py @@ -0,0 +1,117 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import itertools +import json +import re + +from .common import InfoExtractor +from ..compat import ( + compat_urllib_request, + compat_urlparse, +) +from ..utils import ( + int_or_none, + js_to_json, + mimetype2ext, + unified_strdate, +) + + +class SandiaIE(InfoExtractor): + IE_DESC = 'Sandia National Laboratories' + _VALID_URL = r'https?://digitalops\.sandia\.gov/Mediasite/Play/(?P<id>[0-9a-f]+)' + _TEST = { + 'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d', + 'md5': '9422edc9b9a60151727e4b6d8bef393d', + 'info_dict': { + 'id': '24aace4429fc450fb5b38cdbf424a66e1d', + 'ext': 'mp4', + 'title': 'Xyce Software Training - Section 1', + 'description': 're:(?s)SAND Number: SAND 2013-7800.{200,}', + 'upload_date': '20120904', + 'duration': 7794, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4') + webpage = self._download_webpage(req, video_id) + + js_path = self._search_regex( + r'<script type="text/javascript" src="(/Mediasite/FileServer/Presentation/[^"]+)"', + webpage, 'JS code URL') + js_url = compat_urlparse.urljoin(url, js_path) + + js_code = self._download_webpage( + js_url, video_id, note='Downloading player') + + def extract_str(key, **args): + return self._search_regex( + r'Mediasite\.PlaybackManifest\.%s\s*=\s*(.+);\s*?\n' % re.escape(key), + js_code, key, **args) + + def extract_data(key, **args): + data_json = extract_str(key, **args) + if data_json is None: + return data_json + return self._parse_json( + data_json, video_id, transform_source=js_to_json) + + formats = [] + for i in itertools.count(): + fd = extract_data('VideoUrls[%d]' % i, default=None) + if fd is None: + break + formats.append({ + 'format_id': '%s' % i, + 'format_note': fd['MimeType'].partition('/')[2], + 'ext': mimetype2ext(fd['MimeType']), + 'url': fd['Location'], + 'protocol': 'f4m' if fd['MimeType'] == 'video/x-mp4-fragmented' else None, + }) + self._sort_formats(formats) + + slide_baseurl = compat_urlparse.urljoin( + url, extract_data('SlideBaseUrl')) + slide_template = slide_baseurl + re.sub( + r'\{0:D?([0-9+])\}', r'%0\1d', extract_data('SlideImageFileNameTemplate')) + slides = [] + last_slide_time = 0 + for i in itertools.count(1): + sd = extract_str('Slides[%d]' % i, default=None) + if sd is None: + break + timestamp = int_or_none(self._search_regex( + r'^Mediasite\.PlaybackManifest\.CreateSlide\("[^"]*"\s*,\s*([0-9]+),', + sd, 'slide %s timestamp' % i, fatal=False)) + slides.append({ + 'url': slide_template % i, + 'duration': timestamp - last_slide_time, + }) + last_slide_time = timestamp + formats.append({ + 'format_id': 'slides', + 'protocol': 'slideshow', + 'url': json.dumps(slides), + 'preference': -10000, # Downloader not yet written + }) + self._sort_formats(formats) + + title = extract_data('Title') + description = extract_data('Description', fatal=False) + duration = int_or_none(extract_data( + 'Duration', fatal=False), scale=1000) + upload_date = unified_strdate(extract_data('AirDate', fatal=False)) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'formats': formats, + 'upload_date': upload_date, + 'duration': duration, + } diff --git a/youtube_dl/extractor/sockshare.py b/youtube_dl/extractor/sockshare.py index 7d3c0e937..b5fa6f1da 100644 --- a/youtube_dl/extractor/sockshare.py +++ b/youtube_dl/extractor/sockshare.py @@ -25,7 +25,6 @@ class SockshareIE(InfoExtractor): 'id': '437BE28B89D799D7', 'title': 'big_buck_bunny_720p_surround.avi', 'ext': 'avi', - 'thumbnail': 're:^http://.*\.jpg$', } } @@ -45,7 +44,7 @@ class SockshareIE(InfoExtractor): ''', webpage, 'hash') fields = { - "hash": confirm_hash, + "hash": confirm_hash.encode('utf-8'), "confirm": "Continue as Free User" } @@ -68,7 +67,7 @@ class SockshareIE(InfoExtractor): webpage, 'title', default=None) thumbnail = self._html_search_regex( r'<img\s+src="([^"]*)".+?name="bg"', - webpage, 'thumbnail') + webpage, 'thumbnail', default=None) formats = [{ 'format_id': 'sd', diff --git a/youtube_dl/extractor/theonion.py b/youtube_dl/extractor/theonion.py index b65d8e03f..10239c906 100644 --- a/youtube_dl/extractor/theonion.py +++ b/youtube_dl/extractor/theonion.py @@ -4,11 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ExtractorError class TheOnionIE(InfoExtractor): - _VALID_URL = r'(?x)https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<article_id>[0-9]+)/?' + _VALID_URL = r'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?' _TEST = { 'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/', 'md5': '19eaa9a39cf9b9804d982e654dc791ee', @@ -22,10 +21,8 @@ class TheOnionIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - article_id = mobj.group('article_id') - - webpage = self._download_webpage(url, article_id) + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) video_id = self._search_regex( r'"videoId":\s(\d+),', webpage, 'video ID') @@ -34,10 +31,6 @@ class TheOnionIE(InfoExtractor): thumbnail = self._og_search_thumbnail(webpage) sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage) - if not sources: - raise ExtractorError( - 'No sources found for video %s' % video_id, expected=True) - formats = [] for src, type_ in sources: if type_ == 'video/mp4': @@ -54,15 +47,15 @@ class TheOnionIE(InfoExtractor): }) elif type_ == 'application/x-mpegURL': formats.extend( - self._extract_m3u8_formats(src, video_id, preference=-1)) + self._extract_m3u8_formats(src, display_id, preference=-1)) else: self.report_warning( 'Encountered unexpected format: %s' % type_) - self._sort_formats(formats) return { 'id': video_id, + 'display_id': display_id, 'title': title, 'formats': formats, 'thumbnail': thumbnail, diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 1579822f2..f7b34bd26 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -71,7 +71,9 @@ class ThePlatformIE(SubtitlesInfoExtractor): if not provider_id: provider_id = 'dJ5BDC' - if mobj.group('config'): + if smuggled_data.get('force_smil_url', False): + smil_url = url + elif mobj.group('config'): config_url = url + '&form=json' config_url = config_url.replace('swf/', 'config/') config_url = config_url.replace('onsite/', 'onsite/config/') diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 87290d002..4b0d8988d 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -349,6 +349,13 @@ class TwitchStreamIE(TwitchBaseIE): % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')), channel_id, 'mp4') + # prefer the 'source' stream, the others are limited to 30 fps + def _sort_source(f): + if f.get('m3u8_media') is not None and f['m3u8_media'].get('NAME') == 'Source': + return 1 + return 0 + formats = sorted(formats, key=_sort_source) + view_count = stream.get('viewers') timestamp = parse_iso8601(stream.get('created_at')) diff --git a/youtube_dl/extractor/videolecturesnet.py b/youtube_dl/extractor/videolecturesnet.py index ebd2a3dca..d6a7eb203 100644 --- a/youtube_dl/extractor/videolecturesnet.py +++ b/youtube_dl/extractor/videolecturesnet.py @@ -49,15 +49,31 @@ class VideoLecturesNetIE(InfoExtractor): thumbnail = ( None if thumbnail_el is None else thumbnail_el.attrib.get('src')) - formats = [{ - 'url': v.attrib['src'], - 'width': int_or_none(v.attrib.get('width')), - 'height': int_or_none(v.attrib.get('height')), - 'filesize': int_or_none(v.attrib.get('size')), - 'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0, - 'ext': v.attrib.get('ext'), - } for v in switch.findall('./video') - if v.attrib.get('proto') == 'http'] + formats = [] + for v in switch.findall('./video'): + proto = v.attrib.get('proto') + if proto not in ['http', 'rtmp']: + continue + f = { + 'width': int_or_none(v.attrib.get('width')), + 'height': int_or_none(v.attrib.get('height')), + 'filesize': int_or_none(v.attrib.get('size')), + 'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0, + 'ext': v.attrib.get('ext'), + } + src = v.attrib['src'] + if proto == 'http': + if self._is_valid_url(src, video_id): + f['url'] = src + formats.append(f) + elif proto == 'rtmp': + f.update({ + 'url': v.attrib['streamer'], + 'play_path': src, + 'rtmp_real_time': True, + }) + formats.append(f) + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 303e81447..78d287e0e 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -18,6 +18,7 @@ from ..utils import ( InAdvancePagedList, int_or_none, RegexNotFoundError, + smuggle_url, std_headers, unsmuggle_url, urlencode_postdata, @@ -174,7 +175,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): def _verify_video_password(self, url, video_id, webpage): password = self._downloader.params.get('videopassword', None) if password is None: - raise ExtractorError('This video is protected by a password, use the --video-password option') + raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token') data = compat_urllib_parse.urlencode({ 'password': password, @@ -267,8 +268,11 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option') if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None: + if data and '_video_password_verified' in data: + raise ExtractorError('video password verification failed!') self._verify_video_password(url, video_id, webpage) - return self._real_extract(url) + return self._real_extract( + smuggle_url(url, {'_video_password_verified': 'verified'})) else: raise ExtractorError('Unable to extract info section', cause=e) @@ -401,6 +405,7 @@ class VimeoChannelIE(InfoExtractor): _TESTS = [{ 'url': 'http://vimeo.com/channels/tributes', 'info_dict': { + 'id': 'tributes', 'title': 'Vimeo Tributes', }, 'playlist_mincount': 25, @@ -479,6 +484,7 @@ class VimeoUserIE(VimeoChannelIE): 'url': 'http://vimeo.com/nkistudio/videos', 'info_dict': { 'title': 'Nki', + 'id': 'nkistudio', }, 'playlist_mincount': 66, }] @@ -496,6 +502,7 @@ class VimeoAlbumIE(VimeoChannelIE): _TESTS = [{ 'url': 'http://vimeo.com/album/2632481', 'info_dict': { + 'id': '2632481', 'title': 'Staff Favorites: November 2013', }, 'playlist_mincount': 13, @@ -526,6 +533,7 @@ class VimeoGroupsIE(VimeoAlbumIE): _TESTS = [{ 'url': 'http://vimeo.com/groups/rolexawards', 'info_dict': { + 'id': 'rolexawards', 'title': 'Rolex Awards for Enterprise', }, 'playlist_mincount': 73, @@ -608,6 +616,7 @@ class VimeoLikesIE(InfoExtractor): 'url': 'https://vimeo.com/user755559/likes/', 'playlist_mincount': 293, "info_dict": { + 'id': 'user755559_likes', "description": "See all the videos urza likes", "title": 'Videos urza likes', }, diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 81e02a624..7dea8c59d 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -217,6 +217,9 @@ class VKUserVideosIE(InfoExtractor): _TEMPLATE_URL = 'https://vk.com/videos' _TEST = { 'url': 'http://vk.com/videos205387401', + 'info_dict': { + 'id': '205387401', + }, 'playlist_mincount': 4, } diff --git a/youtube_dl/extractor/webofstories.py b/youtube_dl/extractor/webofstories.py index 396cf4e83..73077a312 100644 --- a/youtube_dl/extractor/webofstories.py +++ b/youtube_dl/extractor/webofstories.py @@ -45,19 +45,17 @@ class WebOfStoriesIE(InfoExtractor): description = self._html_search_meta('description', webpage) thumbnail = self._og_search_thumbnail(webpage) - story_filename = self._search_regex( - r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename') - speaker_id = self._search_regex( - r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID') - story_id = self._search_regex( - r'\.storyId\((\d+)\)', webpage, 'story ID') - speaker_type = self._search_regex( - r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type') - great_life = self._search_regex( - r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story') + embed_params = [s.strip(" \r\n\t'") for s in self._search_regex( + r'(?s)\$\("#embedCode"\).html\(getEmbedCode\((.*?)\)', + webpage, 'embed params').split(',')] + + ( + _, speaker_id, story_id, story_duration, + speaker_type, great_life, _thumbnail, _has_subtitles, + story_filename, _story_order) = embed_params + is_great_life_series = great_life == 'true' - duration = int_or_none(self._search_regex( - r'\.duration\((\d+)\)', webpage, 'duration', fatal=False)) + duration = int_or_none(story_duration) # URL building, see: http://www.webofstories.com/scripts/player.js ms_prefix = '' diff --git a/youtube_dl/extractor/wsj.py b/youtube_dl/extractor/wsj.py index cbe3dc7be..2ddf29a69 100644 --- a/youtube_dl/extractor/wsj.py +++ b/youtube_dl/extractor/wsj.py @@ -18,8 +18,8 @@ class WSJIE(InfoExtractor): 'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A', 'ext': 'mp4', 'upload_date': '20150202', - 'uploader_id': 'bbright', - 'creator': 'bbright', + 'uploader_id': 'jdesai', + 'creator': 'jdesai', 'categories': list, # a long list 'duration': 90, 'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo', diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index e8490b028..1644f53c8 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -22,7 +22,7 @@ class XTubeIE(InfoExtractor): 'id': 'kVTUy_G222_', 'ext': 'mp4', 'title': 'strange erotica', - 'description': 'http://www.xtube.com an ET kind of thing', + 'description': 'contains:an ET kind of thing', 'uploader': 'greenshowers', 'duration': 450, 'age_limit': 18, diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index f8e7041a0..97dbac4cc 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -24,7 +24,6 @@ class YahooIE(InfoExtractor): _TESTS = [ { 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', - 'md5': '4962b075c08be8690a922ee026d05e69', 'info_dict': { 'id': '2d25e626-2378-391f-ada0-ddaf1417e588', 'ext': 'mp4', diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 35ef4c303..3d3d43491 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -541,26 +541,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if cache_spec is not None: return lambda s: ''.join(s[i] for i in cache_spec) + download_note = ( + 'Downloading player %s' % player_url + if self._downloader.params.get('verbose') else + 'Downloading %s player %s' % (player_type, player_id) + ) if player_type == 'js': code = self._download_webpage( player_url, video_id, - note='Downloading %s player %s' % (player_type, player_id), + note=download_note, errnote='Download of %s failed' % player_url) res = self._parse_sig_js(code) elif player_type == 'swf': urlh = self._request_webpage( player_url, video_id, - note='Downloading %s player %s' % (player_type, player_id), + note=download_note, errnote='Download of %s failed' % player_url) code = urlh.read() res = self._parse_sig_swf(code) else: assert False, 'Invalid player type %r' % player_type - if cache_spec is None: - test_string = ''.join(map(compat_chr, range(len(example_sig)))) - cache_res = res(test_string) - cache_spec = [ord(c) for c in cache_res] + test_string = ''.join(map(compat_chr, range(len(example_sig)))) + cache_res = res(test_string) + cache_spec = [ord(c) for c in cache_res] self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec) return res diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 453e2732c..0e0c7d90d 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -30,13 +30,10 @@ class JSInterpreter(object): def __init__(self, code, objects=None): if objects is None: objects = {} - self.code = self._remove_comments(code) + self.code = code self._functions = {} self._objects = objects - def _remove_comments(self, code): - return re.sub(r'(?s)/\*.*?\*/', '', code) - def interpret_statement(self, stmt, local_vars, allow_recursion=100): if allow_recursion < 0: raise ExtractorError('Recursion limit reached') diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 16babf6a5..3f2e6cf1d 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -34,10 +34,10 @@ class FFmpegPostProcessor(PostProcessor): self._determine_executables() def check_version(self): - if not self.available(): + if not self.available: raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.') - required_version = '10-0' if self._uses_avconv() else '1.0' + required_version = '10-0' if self.basename == 'avconv' else '1.0' if is_outdated_version( self._versions[self.basename], required_version): warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % ( @@ -108,12 +108,10 @@ class FFmpegPostProcessor(PostProcessor): self.probe_basename = p break + @property def available(self): return self.basename is not None - def _uses_avconv(self): - return self.basename == 'avconv' - @property def executable(self): return self._paths[self.basename] diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 3eb6bc6d4..238b6556b 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1560,8 +1560,8 @@ def js_to_json(code): return '"%s"' % v res = re.sub(r'''(?x) - "(?:[^"\\]*(?:\\\\|\\")?)*"| - '(?:[^'\\]*(?:\\\\|\\')?)*'| + "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"| + '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'| [a-zA-Z_][.a-zA-Z_0-9]* ''', fix_kv, code) res = re.sub(r',(\s*\])', lambda m: m.group(1), res) @@ -1616,6 +1616,15 @@ def args_to_str(args): return ' '.join(shlex_quote(a) for a in args) +def mimetype2ext(mt): + _, _, res = mt.rpartition('/') + + return { + 'x-ms-wmv': 'wmv', + 'x-mp4-fragmented': 'mp4', + }.get(res, res) + + def urlhandle_detect_ext(url_handle): try: url_handle.headers @@ -1631,7 +1640,7 @@ def urlhandle_detect_ext(url_handle): if e: return e - return getheader('Content-Type').split("/")[1] + return mimetype2ext(getheader('Content-Type')) def age_restricted(content_limit, age_limit): diff --git a/youtube_dl/version.py b/youtube_dl/version.py index bbe0e53b5..537e8cf60 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.02.16.1' +__version__ = '2015.02.19.3'