From 0025da15cf310a58ee8f124e395bc1bd52fab5c8 Mon Sep 17 00:00:00 2001 From: alphapapa Date: Sat, 13 Jul 2013 16:42:16 -0500 Subject: [PATCH 001/264] Clarify that download rate is in bytes per second I found https://github.com/rg3/youtube-dl/commit/f918ec7ea29a37521d1fc22fb9f900283c5a2c49 but it is still not clear to anyone who hasn't read Issue #723 whether the limit is in bits or bytes. This is doubly confusing because 1) ISPs usually advertise speeds in bits per second, and 2) lowercase "k" and "m" are often used in correlation with bits rather than bytes. --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index db63d0adb..250cf62f8 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -208,7 +208,7 @@ def parseOpts(overrideArguments=None): help='language of the subtitles to download (optional) use IETF language tags like \'en\'') downloader.add_option('-r', '--rate-limit', - dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)') + dest='ratelimit', metavar='LIMIT', help='maximum download rate in bytes per second (e.g. 50k or 44.6m)') downloader.add_option('-R', '--retries', dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) downloader.add_option('--buffer-size', From d79a0e233a329e543797478a2eeb377e469c0f3f Mon Sep 17 00:00:00 2001 From: Pierre Rudloff Date: Tue, 17 Sep 2013 22:13:40 +0200 Subject: [PATCH 002/264] Extractor for websurg.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/websurg.py | 67 ++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 youtube_dl/extractor/websurg.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 761575062..19ded18f1 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -109,6 +109,7 @@ from .videofyme import VideofyMeIE from .vimeo import VimeoIE, VimeoChannelIE from .vine import VineIE from .wat import WatIE +from .websurg import WeBSurgIE from .weibo import WeiboIE from .wimp import WimpIE from .worldstarhiphop import WorldStarHipHopIE diff --git a/youtube_dl/extractor/websurg.py b/youtube_dl/extractor/websurg.py new file mode 100644 index 000000000..953bc9831 --- /dev/null +++ b/youtube_dl/extractor/websurg.py @@ -0,0 +1,67 @@ +# coding: utf-8 + +import re + +from ..utils import ( + compat_urllib_request, + compat_urllib_parse +) + +from .common import InfoExtractor + +class WeBSurgIE(InfoExtractor): + IE_NAME = u'websurg.com' + _VALID_URL = r'http://.*?\.websurg\.com/MEDIA/\?noheader=1&doi=(.*)' + + _TEST = { + u'url': u'http://www.websurg.com/MEDIA/?noheader=1&doi=vd01en4012', + u'file': u'vd01en4012.mp4', + u'params': { + u'skip_download': True, + } + } + + _LOGIN_URL = 'http://www.websurg.com/inc/login/login_div.ajax.php?login=1' + + def _real_extract(self, url): + + login_form = { + 'username': self._downloader.params['username'], + 'password': self._downloader.params['password'], + 'Submit': 1 + } + + request = compat_urllib_request.Request( + self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) + request.add_header( + 'Content-Type', 'application/x-www-form-urlencoded;charset=utf-8') + login_results = compat_urllib_request.urlopen(request).info() + + sessid = re.match(r'PHPSESSID=(.*);', + login_results['Set-Cookie']).group(1) + request = compat_urllib_request.Request( + url, compat_urllib_parse.urlencode(login_form), + {'Cookie': 'PHPSESSID=' + sessid + ';'}) + webpage = compat_urllib_request.urlopen(request).read() + + video_id = re.match(self._VALID_URL, url).group(1) + + url_info = re.search(r'streamer="(.*?)" src="(.*?)"', webpage) + + if url_info is None: + self._downloader.report_warning( + u'Unable to log in: bad username/password') + return + + return {'id': video_id, + 'title' : re.search( + r'property="og:title" content="(.*?)" />' + , webpage).group(1), + 'description': re.search( + r'name="description" content="(.*?)" />', webpage).group(1), + 'ext' : 'mp4', + 'url' : url_info.group(1) + '/' + url_info.group(2), + 'thumbnail': re.search( + r'property="og:image" content="(.*?)" />', webpage + ).group(1) + } From cc6943e86aef74bef767be7f4027ab6122c95d55 Mon Sep 17 00:00:00 2001 From: Pierre Rudloff Date: Wed, 18 Sep 2013 00:07:04 +0200 Subject: [PATCH 003/264] Improvements --- youtube_dl/extractor/websurg.py | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/websurg.py b/youtube_dl/extractor/websurg.py index 953bc9831..efc8029af 100644 --- a/youtube_dl/extractor/websurg.py +++ b/youtube_dl/extractor/websurg.py @@ -23,7 +23,7 @@ class WeBSurgIE(InfoExtractor): _LOGIN_URL = 'http://www.websurg.com/inc/login/login_div.ajax.php?login=1' - def _real_extract(self, url): + def _real_initialize(self): login_form = { 'username': self._downloader.params['username'], @@ -35,14 +35,13 @@ class WeBSurgIE(InfoExtractor): self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) request.add_header( 'Content-Type', 'application/x-www-form-urlencoded;charset=utf-8') - login_results = compat_urllib_request.urlopen(request).info() + compat_urllib_request.urlopen(request).info() - sessid = re.match(r'PHPSESSID=(.*);', - login_results['Set-Cookie']).group(1) - request = compat_urllib_request.Request( - url, compat_urllib_parse.urlencode(login_form), - {'Cookie': 'PHPSESSID=' + sessid + ';'}) - webpage = compat_urllib_request.urlopen(request).read() + def _real_extract(self, url): + + request = compat_urllib_request.Request(url) + webpage = unicode( + compat_urllib_request.urlopen(request).read(), 'utf-8') video_id = re.match(self._VALID_URL, url).group(1) @@ -52,16 +51,10 @@ class WeBSurgIE(InfoExtractor): self._downloader.report_warning( u'Unable to log in: bad username/password') return - return {'id': video_id, - 'title' : re.search( - r'property="og:title" content="(.*?)" />' - , webpage).group(1), - 'description': re.search( - r'name="description" content="(.*?)" />', webpage).group(1), + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), 'ext' : 'mp4', 'url' : url_info.group(1) + '/' + url_info.group(2), - 'thumbnail': re.search( - r'property="og:image" content="(.*?)" />', webpage - ).group(1) + 'thumbnail': self._og_search_thumbnail(webpage) } From 0b7c2485b66d53ad14bc331e867927b370599e43 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 28 Sep 2013 15:43:34 +0200 Subject: [PATCH 004/264] [zdf] Add support for hash URLs and simplify (#1518) --- youtube_dl/extractor/zdf.py | 76 +++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 33 deletions(-) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 418509cb9..faed7ff7f 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -2,16 +2,14 @@ import re from .common import InfoExtractor from ..utils import ( + determine_ext, ExtractorError, - unescapeHTML, ) + class ZDFIE(InfoExtractor): - _VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek\/(.*beitrag\/video\/)(?P[^/\?]+)(?:\?.*)?' - _TITLE = r'(?P.*)</h1>' + _VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek(?P<hash>#)?\/(.*beitrag\/video\/)(?P<video_id>[^/\?]+)(?:\?.*)?' _MEDIA_STREAM = r'<a href="(?P<video_url>.+(?P<media_type>.streaming).+/zdf/(?P<quality>[^\/]+)/[^"]*)".+class="play".+>' - _MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"' - _RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -19,6 +17,9 @@ class ZDFIE(InfoExtractor): raise ExtractorError(u'Invalid URL: %s' % url) video_id = mobj.group('video_id') + if mobj.group('hash'): + url = url.replace(u'#', u'', 1) + html = self._download_webpage(url, video_id) streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)] if streams is None: @@ -27,39 +28,48 @@ class ZDFIE(InfoExtractor): # s['media_type'] == 'wstreaming' -> use 'Windows Media Player' and mms url # s['media_type'] == 'hstreaming' -> use 'Quicktime' and rtsp url # choose first/default media type and highest quality for now - for s in streams: #find 300 - dsl1000mbit - if s['quality'] == '300' and s['media_type'] == 'wstreaming': - stream_=s - break - for s in streams: #find veryhigh - dsl2000mbit - if s['quality'] == 'veryhigh' and s['media_type'] == 'wstreaming': # 'hstreaming' - rtsp is not working - stream_=s - break - if stream_ is None: + def stream_pref(s): + TYPE_ORDER = ['ostreaming', 'hstreaming', 'wstreaming'] + try: + type_pref = TYPE_ORDER.index(s['media_type']) + except ValueError: + type_pref = 999 + + QUALITY_ORDER = ['veryhigh', '300'] + try: + quality_pref = QUALITY_ORDER.index(s['quality']) + except ValueError: + quality_pref = 999 + + return (type_pref, quality_pref) + + sorted_streams = sorted(streams, key=stream_pref) + if not sorted_streams: raise ExtractorError(u'No stream found.') + stream = sorted_streams[0] - media_link = self._download_webpage(stream_['video_url'], video_id,'Get stream URL') + media_link = self._download_webpage( + stream['video_url'], + video_id, + u'Get stream URL') - self.report_extraction(video_id) - mobj = re.search(self._TITLE, html) + MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"' + RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)' + + mobj = re.search(self._MEDIA_STREAM, media_link) if mobj is None: - raise ExtractorError(u'Cannot extract title') - title = unescapeHTML(mobj.group('title')) - - mobj = re.search(self._MMS_STREAM, media_link) - if mobj is None: - mobj = re.search(self._RTSP_STREAM, media_link) + mobj = re.search(RTSP_STREAM, media_link) if mobj is None: raise ExtractorError(u'Cannot extract mms:// or rtsp:// URL') - mms_url = mobj.group('video_url') + video_url = mobj.group('video_url') - mobj = re.search('(.*)[.](?P<ext>[^.]+)', mms_url) - if mobj is None: - raise ExtractorError(u'Cannot extract extention') - ext = mobj.group('ext') + title = self._html_search_regex( + r'<h1(?: class="beitragHeadline")?>(.*?)</h1>', + html, u'title') - return [{'id': video_id, - 'url': mms_url, - 'title': title, - 'ext': ext - }] + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'ext': determine_ext(video_url) + } From 9c15e9de849641143e7654f2656c68e066fe9e2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 28 Sep 2013 21:19:52 +0200 Subject: [PATCH 005/264] [yahoo] Fix video extraction (fixes #1521) There's no need to use two different methods. Now we can also download videos over http if possible. Also run the test for rtmp videos, but skip the download. --- youtube_dl/extractor/yahoo.py | 132 +++++++++++++++++----------------- 1 file changed, 65 insertions(+), 67 deletions(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 32d5b9477..39126e631 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -1,4 +1,3 @@ -import datetime import itertools import json import re @@ -6,86 +5,85 @@ import re from .common import InfoExtractor, SearchInfoExtractor from ..utils import ( compat_urllib_parse, - - ExtractorError, + compat_urlparse, + determine_ext, + clean_html, ) + class YahooIE(InfoExtractor): IE_DESC = u'Yahoo screen' _VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html' - _TEST = { - u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', - u'file': u'214727115.flv', - u'md5': u'2e717f169c1be93d84d3794a00d4a325', - u'info_dict': { - u"title": u"Julian Smith & Travis Legg Watch Julian Smith" + _TESTS = [ + { + u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', + u'file': u'214727115.mp4', + u'info_dict': { + u'title': u'Julian Smith & Travis Legg Watch Julian Smith', + u'description': u'Julian and Travis watch Julian Smith', + }, }, - u'skip': u'Requires rtmpdump' - } + { + u'url': u'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html', + u'file': u'103000935.flv', + u'info_dict': { + u'title': u'The Cougar Lies with Spanish Moss', + u'description': u'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?', + }, + u'params': { + # Requires rtmpdump + u'skip_download': True, + }, + }, + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - m_id = re.search(r'YUI\.namespace\("Media"\)\.CONTENT_ID = "(?P<new_id>.+?)";', webpage) - if m_id is None: - # TODO: Check which url parameters are required - info_url = 'http://cosmos.bcst.yahoo.com/rest/v2/pops;lmsoverride=1;outputformat=mrss;cb=974419660;id=%s;rd=news.yahoo.com;datacontext=mdb;lg=KCa2IihxG3qE60vQ7HtyUy' % video_id - webpage = self._download_webpage(info_url, video_id, u'Downloading info webpage') - info_re = r'''<title><!\[CDATA\[(?P<title>.*?)\]\]>.* - .*?)\]\]>.* - .*?)\ .*\]\]>.* - Date: Sun, 29 Sep 2013 12:44:02 +0200 Subject: [PATCH 006/264] [dailymotion] Disable the family filter in the playlists (fixes #1524) --- youtube_dl/extractor/dailymotion.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 64b89aae8..3f012aedc 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -14,8 +14,15 @@ from ..utils import ( ExtractorError, ) +class DailymotionBaseInfoExtractor(InfoExtractor): + @staticmethod + def _build_request(url): + """Build a request with the family filter disabled""" + request = compat_urllib_request.Request(url) + request.add_header('Cookie', 'family_filter=off') + return request -class DailymotionIE(SubtitlesInfoExtractor): +class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): """Information Extractor for Dailymotion""" _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)' @@ -40,8 +47,7 @@ class DailymotionIE(SubtitlesInfoExtractor): url = 'http://www.dailymotion.com/video/%s' % video_id # Retrieve video webpage to extract further information - request = compat_urllib_request.Request(url) - request.add_header('Cookie', 'family_filter=off') + request = self._build_request(url) webpage = self._download_webpage(request, video_id) # Extract URL, uploader and title from webpage @@ -113,7 +119,7 @@ class DailymotionIE(SubtitlesInfoExtractor): return {} -class DailymotionPlaylistIE(InfoExtractor): +class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): IE_NAME = u'dailymotion:playlist' _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P.+?)/' _MORE_PAGES_INDICATOR = r'' @@ -122,7 +128,8 @@ class DailymotionPlaylistIE(InfoExtractor): def _extract_entries(self, id): video_ids = [] for pagenum in itertools.count(1): - webpage = self._download_webpage(self._PAGE_TEMPLATE % (id, pagenum), + request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum)) + webpage = self._download_webpage(request, id, u'Downloading page %s' % pagenum) playlist_el = get_element_by_attribute(u'class', u'video_list', webpage) From 46353f6783b9e468c9271c864f0711c85d3cea33 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 29 Sep 2013 11:17:38 +0200 Subject: [PATCH 007/264] [update] Look for .exe extension on Windows (Fixes #745) --- youtube_dl/__init__.py | 2 +- youtube_dl/update.py | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 3851fc0a6..28a7bdd92 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -658,7 +658,7 @@ def _real_main(argv=None): # Update version if opts.update_self: - update_self(ydl.to_screen, opts.verbose, sys.argv[0]) + update_self(ydl.to_screen, opts.verbose) # Maybe do nothing if len(all_urls) < 1: diff --git a/youtube_dl/update.py b/youtube_dl/update.py index ccab6f27f..669b59a68 100644 --- a/youtube_dl/update.py +++ b/youtube_dl/update.py @@ -1,6 +1,7 @@ import json import traceback import hashlib +import sys from zipimport import zipimporter from .utils import * @@ -34,7 +35,7 @@ def rsa_verify(message, signature, key): if signature != sha256(message).digest(): return False return True -def update_self(to_screen, verbose, filename): +def update_self(to_screen, verbose): """Update the program file with the latest version from the repository""" UPDATE_URL = "http://rg3.github.io/youtube-dl/update/" @@ -42,7 +43,6 @@ def update_self(to_screen, verbose, filename): JSON_URL = UPDATE_URL + 'versions.json' UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537) - if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, "frozen"): to_screen(u'It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.') return @@ -80,6 +80,12 @@ def update_self(to_screen, verbose, filename): print_notes(to_screen, versions_info['versions']) + filename = sys.argv[0] + # Py2EXE: Filename could be different + if hasattr(sys, "frozen") and not os.path.isfile(filename): + if os.path.isfile(filename + u'.exe'): + filename += u'.exe' + if not os.access(filename, os.W_OK): to_screen(u'ERROR: no write permissions on %s' % filename) return From d27903703673e565a3a1e8dd418d1347ef331b3e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 29 Sep 2013 11:26:01 +0200 Subject: [PATCH 008/264] [update] Prevent cmd window popup on Windows (Fixes #1478) --- youtube_dl/update.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/youtube_dl/update.py b/youtube_dl/update.py index 669b59a68..0689a4891 100644 --- a/youtube_dl/update.py +++ b/youtube_dl/update.py @@ -1,6 +1,8 @@ +import io import json import traceback import hashlib +import subprocess import sys from zipimport import zipimporter @@ -75,8 +77,9 @@ def update_self(to_screen, verbose): to_screen(u'ERROR: the versions file signature is invalid. Aborting.') return - to_screen(u'Updating to version ' + versions_info['latest'] + '...') - version = versions_info['versions'][versions_info['latest']] + version_id = versions_info['latest'] + to_screen(u'Updating to version ' + version_id + '...') + version = versions_info['versions'][version_id] print_notes(to_screen, versions_info['versions']) @@ -122,16 +125,18 @@ def update_self(to_screen, verbose): try: bat = os.path.join(directory, 'youtube-dl-updater.bat') - b = open(bat, 'w') - b.write(""" -echo Updating youtube-dl... + with io.open(bat, 'w') as batfile: + batfile.write(u""" +@echo off +echo Waiting for file handle to be closed ... ping 127.0.0.1 -n 5 -w 1000 > NUL -move /Y "%s.new" "%s" -del "%s" - \n""" %(exe, exe, bat)) - b.close() +move /Y "%s.new" "%s" > NUL +echo Updated youtube-dl to version %s. +start /b "" cmd /c del "%%~f0"&exit /b" + \n""" % (exe, exe, version_id)) - os.startfile(bat) + subprocess.Popen([bat]) # Continues to run in the background + return # Do not show premature success messages except (IOError, OSError) as err: if verbose: to_screen(compat_str(traceback.format_exc())) to_screen(u'ERROR: unable to overwrite current version') From 138a5454b5f2af27b0b31764a8125cad23fd3429 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 29 Sep 2013 14:38:37 +0200 Subject: [PATCH 009/264] release 2013.09.29 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8e6356dab..e3e5d5538 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.09.24.2' +__version__ = '2013.09.29' From 843530568f326294d714b5b9f11bbf6176d73ccf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 29 Sep 2013 20:49:58 +0200 Subject: [PATCH 010/264] [appletrailers] Rework extraction (fixes #1387) The exraction was broken: * The includes page contains img elements that need to be fixed. * Use the 'itunes.inc' page, it contains a json dictionary for each trailer with information. * Get the formats from 'includes/settings{trailer_name}.json' * Use urljoin to allow urls with a fragment identifier to work Removed the thumbnail urls from the tests, they are different now. --- youtube_dl/extractor/appletrailers.py | 112 ++++++++++---------------- 1 file changed, 42 insertions(+), 70 deletions(-) diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index 8b191c196..b86c4b909 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -1,8 +1,10 @@ import re import xml.etree.ElementTree +import json from .common import InfoExtractor from ..utils import ( + compat_urlparse, determine_ext, ) @@ -14,10 +16,9 @@ class AppleTrailersIE(InfoExtractor): u"playlist": [ { u"file": u"manofsteel-trailer4.mov", - u"md5": u"11874af099d480cc09e103b189805d5f", + u"md5": u"d97a8e575432dbcb81b7c3acb741f8a8", u"info_dict": { u"duration": 111, - u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg", u"title": u"Trailer 4", u"upload_date": u"20130523", u"uploader_id": u"wb", @@ -25,10 +26,9 @@ class AppleTrailersIE(InfoExtractor): }, { u"file": u"manofsteel-trailer3.mov", - u"md5": u"07a0a262aae5afe68120eed61137ab34", + u"md5": u"b8017b7131b721fb4e8d6f49e1df908c", u"info_dict": { u"duration": 182, - u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg", u"title": u"Trailer 3", u"upload_date": u"20130417", u"uploader_id": u"wb", @@ -36,10 +36,9 @@ class AppleTrailersIE(InfoExtractor): }, { u"file": u"manofsteel-trailer.mov", - u"md5": u"e401fde0813008e3307e54b6f384cff1", + u"md5": u"d0f1e1150989b9924679b441f3404d48", u"info_dict": { u"duration": 148, - u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg", u"title": u"Trailer", u"upload_date": u"20121212", u"uploader_id": u"wb", @@ -47,10 +46,9 @@ class AppleTrailersIE(InfoExtractor): }, { u"file": u"manofsteel-teaser.mov", - u"md5": u"76b392f2ae9e7c98b22913c10a639c97", + u"md5": u"5fe08795b943eb2e757fa95cb6def1cb", u"info_dict": { u"duration": 93, - u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg", u"title": u"Teaser", u"upload_date": u"20120721", u"uploader_id": u"wb", @@ -59,87 +57,61 @@ class AppleTrailersIE(InfoExtractor): ] } + _JSON_RE = r'iTunes.playURL\((.*?)\);' + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) movie = mobj.group('movie') uploader_id = mobj.group('company') - playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc' + playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc') playlist_snippet = self._download_webpage(playlist_url, movie) - playlist_cleaned = re.sub(r'(?s)', u'', playlist_snippet) + playlist_cleaned = re.sub(r'(?s).*?', u'', playlist_snippet) + playlist_cleaned = re.sub(r'', r'', playlist_cleaned) + # The ' in the onClick attributes are not escaped, it couldn't be parsed + # with xml.etree.ElementTree.fromstring + # like: http://trailers.apple.com/trailers/wb/gravity/ + def _clean_json(m): + return u'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') + playlist_cleaned = re.sub(self._JSON_RE, _clean_json, playlist_cleaned) playlist_html = u'' + playlist_cleaned + u'' - size_cache = {} - doc = xml.etree.ElementTree.fromstring(playlist_html) playlist = [] for li in doc.findall('./div/ul/li'): - title = li.find('.//h3').text + on_click = li.find('.//a').attrib['onClick'] + trailer_info_json = self._search_regex(self._JSON_RE, + on_click, u'trailer info') + trailer_info = json.loads(trailer_info_json) + title = trailer_info['title'] video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower() thumbnail = li.find('.//img').attrib['src'] + upload_date = trailer_info['posted'].replace('-', '') - date_el = li.find('.//p') - upload_date = None - m = re.search(r':\s?(?P[0-9]{2})/(?P[0-9]{2})/(?P[0-9]{2})', date_el.text) - if m: - upload_date = u'20' + m.group('year') + m.group('month') + m.group('day') - runtime_el = date_el.find('./br') - m = re.search(r':\s?(?P[0-9]+):(?P[0-9]{1,2})', runtime_el.tail) + runtime = trailer_info['runtime'] + m = re.search(r'(?P[0-9]+):(?P[0-9]{1,2})', runtime) duration = None if m: duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) + first_url = trailer_info['url'] + trailer_id = first_url.split('/')[-1].rpartition('_')[0] + settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id) + settings_json = self._download_webpage(settings_json_url, trailer_id, u'Downloading settings json') + settings = json.loads(settings_json) + formats = [] - for formats_el in li.findall('.//a'): - if formats_el.attrib['class'] != 'OverlayPanel': - continue - target = formats_el.attrib['target'] - - format_code = formats_el.text - if 'Automatic' in format_code: - continue - - size_q = formats_el.attrib['href'] - size_id = size_q.rpartition('#videos-')[2] - if size_id not in size_cache: - size_url = url + size_q - sizepage_html = self._download_webpage( - size_url, movie, - note=u'Downloading size info %s' % size_id, - errnote=u'Error while downloading size info %s' % size_id, - ) - _doc = xml.etree.ElementTree.fromstring(sizepage_html) - size_cache[size_id] = _doc - - sizepage_doc = size_cache[size_id] - links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a') - for vid_a in links: - href = vid_a.get('href') - if not href.endswith(target): - continue - detail_q = href.partition('#')[0] - detail_url = url + '/' + detail_q - - m = re.match(r'includes/(?P[^/]+)/', detail_q) - detail_id = m.group('detail_id') - - detail_html = self._download_webpage( - detail_url, movie, - note=u'Downloading detail %s %s' % (detail_id, size_id), - errnote=u'Error while downloading detail %s %s' % (detail_id, size_id) - ) - detail_doc = xml.etree.ElementTree.fromstring(detail_html) - movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a') - assert movie_link_el.get('class') == 'movieLink' - movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h') - ext = determine_ext(movie_link) - assert ext == 'mov' - - formats.append({ - 'format': format_code, - 'ext': ext, - 'url': movie_link, - }) + for format in settings['metadata']['sizes']: + # The src is a file pointing to the real video file + format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src']) + formats.append({ + 'url': format_url, + 'ext': determine_ext(format_url), + 'format': format['type'], + 'width': format['width'], + 'height': int(format['height']), + }) + formats = sorted(formats, key=lambda f: (f['height'], f['width'])) info = { '_type': 'video', From bb4aa62cf7ad3d5aae4edf56ab8954c80a2d8956 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 29 Sep 2013 20:59:19 +0200 Subject: [PATCH 011/264] [appletrailers] The request for the settings must have the trailer name in lower case (fixes #1329) --- youtube_dl/extractor/appletrailers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index b86c4b909..6d6237f8a 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -95,7 +95,7 @@ class AppleTrailersIE(InfoExtractor): duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) first_url = trailer_info['url'] - trailer_id = first_url.split('/')[-1].rpartition('_')[0] + trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower() settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id) settings_json = self._download_webpage(settings_json_url, trailer_id, u'Downloading settings json') settings = json.loads(settings_json) From 722076a123c60ed6d5a978c4bc2609f46c8e3ee9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 29 Sep 2013 23:07:26 +0200 Subject: [PATCH 012/264] [rtlnow] Replace one of the tests The video is no longer available. --- youtube_dl/extractor/rtlnow.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py index e6fa0475e..32541077f 100644 --- a/youtube_dl/extractor/rtlnow.py +++ b/youtube_dl/extractor/rtlnow.py @@ -63,13 +63,13 @@ class RTLnowIE(InfoExtractor): }, }, { - u'url': u'http://www.rtlnitronow.de/recht-ordnung/fahrradpolizei-koeln-fischereiaufsicht-ruegen.php?film_id=124311&player=1&season=1', - u'file': u'124311.flv', + u'url': u'http://www.rtlnitronow.de/recht-ordnung/lebensmittelkontrolle-erlangenordnungsamt-berlin.php?film_id=127367&player=1&season=1', + u'file': u'127367.flv', u'info_dict': { - u'upload_date': u'20130830', - u'title': u'Recht & Ordnung - Fahrradpolizei Köln & Fischereiaufsicht Rügen', - u'description': u'Fahrradpolizei Köln & Fischereiaufsicht Rügen', - u'thumbnail': u'http://autoimg.static-fra.de/nitronow/338273/1500x1500/image2.jpg' + u'upload_date': u'20130926', + u'title': u'Recht & Ordnung - Lebensmittelkontrolle Erlangen/Ordnungsamt...', + u'description': u'Lebensmittelkontrolle Erlangen/Ordnungsamt Berlin', + u'thumbnail': u'http://autoimg.static-fra.de/nitronow/344787/1500x1500/image2.jpg', }, u'params': { u'skip_download': True, From 47192f92d801f38c0a608ca9c6cecc682ab2ecc6 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Mon, 30 Sep 2013 16:26:25 -0400 Subject: [PATCH 013/264] implement --no-playlist to only download current video - closes #755 --- README.md | 1 + youtube_dl/YoutubeDL.py | 1 + youtube_dl/__init__.py | 2 ++ youtube_dl/extractor/youtube.py | 13 ++++++++++++- 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index fc8070c37..66a483b76 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,7 @@ which means you can modify it, redistribute it or use it however you like. --date DATE download only videos uploaded in this date --datebefore DATE download only videos uploaded before this date --dateafter DATE download only videos uploaded after this date + --no-playlist download only the currently playing video ## Download Options: -r, --rate-limit LIMIT maximum download rate (e.g. 50k or 44.6m) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 44a272e7e..2503fd09b 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -83,6 +83,7 @@ class YoutubeDL(object): skip_download: Skip the actual download of the video file cachedir: Location of the cache files in the filesystem. None to disable filesystem cache. + noplaylist: Download single video instead of a playlist if in doubt. The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 28a7bdd92..c9e75eab4 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -187,6 +187,7 @@ def parseOpts(overrideArguments=None): selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None) selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None) selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None) + selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False) authentication.add_option('-u', '--username', @@ -599,6 +600,7 @@ def _real_main(argv=None): 'progress_with_newline': opts.progress_with_newline, 'playliststart': opts.playliststart, 'playlistend': opts.playlistend, + 'noplaylist': opts.noplaylist, 'logtostderr': opts.outtmpl == '-', 'consoletitle': opts.consoletitle, 'nopart': opts.nopart, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 53f13b516..c6876c69f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -13,6 +13,7 @@ import struct import traceback import xml.etree.ElementTree import zlib +import urlparse from .common import InfoExtractor, SearchInfoExtractor from .subtitles import SubtitlesInfoExtractor @@ -1523,9 +1524,19 @@ class YoutubePlaylistIE(InfoExtractor): mobj = re.match(self._VALID_URL, url, re.VERBOSE) if mobj is None: raise ExtractorError(u'Invalid URL: %s' % url) + playlist_id = mobj.group(1) or mobj.group(2) + + # Check if it's a video-specific URL + query_dict = urlparse.parse_qs(urlparse.urlparse(url).query) + if 'v' in query_dict: + video_id = query_dict['v'][0] + if self._downloader.params.get('noplaylist'): + self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id) + return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube') + else: + self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id)) # Download playlist videos from API - playlist_id = mobj.group(1) or mobj.group(2) videos = [] for page_num in itertools.count(1): From d4d9920a2630ef6c44cffa1b923e41291b44b5f0 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Mon, 30 Sep 2013 18:01:17 -0400 Subject: [PATCH 014/264] add test for --no-playlist --- test/test_youtube_lists.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index dd9e292b0..f28fe78e0 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -27,6 +27,13 @@ class TestYoutubeLists(unittest.TestCase): ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE']) + def test_youtube_playlist_noplaylist(self): + dl = FakeYDL() + dl.params['noplaylist'] = True + ie = YoutubePlaylistIE(dl) + result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') + self.assertEqual(result['_type'], 'url') + def test_issue_673(self): dl = FakeYDL() ie = YoutubePlaylistIE(dl) From fa556755930db77c62016a468d870e76608db012 Mon Sep 17 00:00:00 2001 From: Mark Oteiza Date: Sun, 29 Sep 2013 22:50:46 -0400 Subject: [PATCH 015/264] Support XDG base directory specification --- youtube_dl/__init__.py | 16 +++++++++++++--- youtube_dl/extractor/youtube.py | 8 ++++++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 28a7bdd92..95f75942a 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -113,6 +113,12 @@ def parseOpts(overrideArguments=None): pass return opts + xdg_cache_home = os.environ.get('XDG_CACHE_HOME') + if xdg_cache_home: + userCacheDir = os.path.join(xdg_cache_home, 'youtube-dl') + else: + userCacheDir = os.path.join(os.path.expanduser('~'), '.cache', 'youtube-dl') + max_width = 80 max_help_position = 80 @@ -168,7 +174,7 @@ def parseOpts(overrideArguments=None): general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL') general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') general.add_option( - '--cache-dir', dest='cachedir', default=u'~/.youtube-dl/cache', + '--cache-dir', dest='cachedir', default=userCacheDir, help='Location in the filesystem where youtube-dl can store downloaded information permanently. %default by default') general.add_option( '--no-cache-dir', action='store_const', const=None, dest='cachedir', @@ -369,9 +375,13 @@ def parseOpts(overrideArguments=None): else: xdg_config_home = os.environ.get('XDG_CONFIG_HOME') if xdg_config_home: - userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf') + userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config') + if not os.path.isfile(userConfFile): + userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf') else: - userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf') + userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config') + if not os.path.isfile(userConfFile): + userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf') systemConf = _readOptions('/etc/youtube-dl.conf') userConf = _readOptions(userConfFile) commandLineConf = sys.argv[1:] diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 53f13b516..23e384ba2 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -420,8 +420,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # Read from filesystem cache func_id = '%s_%s_%d' % (player_type, player_id, slen) assert os.path.basename(func_id) == func_id - cache_dir = self._downloader.params.get('cachedir', - u'~/.youtube-dl/cache') + xdg_cache_home = os.environ.get('XDG_CACHE_HOME') + if xdg_cache_home: + userCacheDir = os.path.join(xdg_cache_home, 'youtube-dl') + else: + userCacheDir = os.path.join(os.path.expanduser('~'), '.cache', 'youtube-dl') + cache_dir = self._downloader.params.get('cachedir', userCacheDir) cache_enabled = cache_dir is not None if cache_enabled: From 4c62a16f4f4994c63e80eafcaeb5e6ff90305c38 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Tue, 1 Oct 2013 06:55:30 +0200 Subject: [PATCH 016/264] [RTLnowIE] Add support for http://n-tvnow.de --- youtube_dl/extractor/rtlnow.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py index 32541077f..fe66cc6e5 100644 --- a/youtube_dl/extractor/rtlnow.py +++ b/youtube_dl/extractor/rtlnow.py @@ -8,8 +8,8 @@ from ..utils import ( ) class RTLnowIE(InfoExtractor): - """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW and VOX NOW""" - _VALID_URL = r'(?:http://)?(?P(?Prtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?rtlnitronow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' + """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW""" + _VALID_URL = r'(?:http://)?(?P(?Prtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?rtlnitronow\.de/|(?:www\.)?superrtlnow\.de/|(?:www\.)?n-tvnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' _TESTS = [{ u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', u'file': u'90419.flv', @@ -74,6 +74,18 @@ class RTLnowIE(InfoExtractor): u'params': { u'skip_download': True, }, + }, + { + u'url': u'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10', + u'file': u'124903.flv', + u'info_dict': { + u'upload_date': u'20130101', + u'title': u'Top Gear vom 01.01.2013', + u'description': u'Episode 1', + }, + u'params': { + u'skip_download': True, + }, }] def _real_extract(self,url): From adfeafe9e19d3240bb07ad6ca97f01aed86c0615 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Tue, 1 Oct 2013 07:22:49 +0200 Subject: [PATCH 017/264] [RTLnowIE] Allow video description without upload date Some videos (feature films) have no upload date. --- youtube_dl/extractor/rtlnow.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py index 32541077f..a156666f9 100644 --- a/youtube_dl/extractor/rtlnow.py +++ b/youtube_dl/extractor/rtlnow.py @@ -98,14 +98,17 @@ class RTLnowIE(InfoExtractor): webpage, u'playerdata_url') playerdata = self._download_webpage(playerdata_url, video_id) - mobj = re.search(r'<!\[CDATA\[(?P<description>.+?)\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr\]\]>', playerdata) + mobj = re.search(r'<!\[CDATA\[(?P<description>.+?)(?:\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr)?\]\]>', playerdata) if mobj: video_description = mobj.group(u'description') if mobj.group('upload_date_Y'): video_upload_date = mobj.group('upload_date_Y') - else: + elif mobj.group('upload_date_y'): video_upload_date = u'20' + mobj.group('upload_date_y') - video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d') + else: + video_upload_date = None + if video_upload_date: + video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d') else: video_description = None video_upload_date = None From f10503db67fb01f85755159ef2ad4d3bc3a58b7a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 1 Oct 2013 11:39:11 +0200 Subject: [PATCH 018/264] Handle videos without url_encoded_fmt_stream_map (Fixes #1535) --- youtube_dl/extractor/youtube.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 53f13b516..f3b9e3ab1 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1390,6 +1390,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): args = info['args'] # Easy way to know if the 's' value is in url_encoded_fmt_stream_map # this signatures are encrypted + if 'url_encoded_fmt_stream_map': + raise ValueError(u'No stream_map present') # caught below m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map']) if m_s is not None: self.to_screen(u'%s: Encrypted signatures detected.' % video_id) From 05751eb0479b988a89d650befea239a32bddbfd9 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 1 Oct 2013 11:43:54 +0200 Subject: [PATCH 019/264] release 2013.10.01 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e3e5d5538..1909f4a7f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.09.29' +__version__ = '2013.10.01' From 44d466559e9d1e762456a2dce4fc6cb0775f105e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 1 Oct 2013 14:44:09 +0200 Subject: [PATCH 020/264] Properly handle stream meap not being present --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f3b9e3ab1..890667340 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1390,7 +1390,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): args = info['args'] # Easy way to know if the 's' value is in url_encoded_fmt_stream_map # this signatures are encrypted - if 'url_encoded_fmt_stream_map': + if 'url_encoded_fmt_stream_map' not in args: raise ValueError(u'No stream_map present') # caught below m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map']) if m_s is not None: From 52f15da2caf58df4ddd8f995ca2fa5cfebd0cdfb Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 1 Oct 2013 14:44:26 +0200 Subject: [PATCH 021/264] release 2013.10.01.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 1909f4a7f..8e9ca6126 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.01' +__version__ = '2013.10.01.1' From c54283824c72434c31e0dcce1d09fc3259dbcdc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 1 Oct 2013 15:05:41 +0200 Subject: [PATCH 022/264] [dailymotion] Detect vevo videos (fixes #1532) All videos from the Vevo user, just embed videos from vevo.com --- youtube_dl/extractor/dailymotion.py | 43 +++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 3f012aedc..259806f38 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -27,15 +27,31 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)' IE_NAME = u'dailymotion' - _TEST = { - u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech', - u'file': u'x33vw9.mp4', - u'md5': u'392c4b85a60a90dc4792da41ce3144eb', - u'info_dict': { - u"uploader": u"Amphora Alex and Van .", - u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" - } - } + _TESTS = [ + { + u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech', + u'file': u'x33vw9.mp4', + u'md5': u'392c4b85a60a90dc4792da41ce3144eb', + u'info_dict': { + u"uploader": u"Amphora Alex and Van .", + u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" + } + }, + # Vevo video + { + u'url': u'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi', + u'file': u'USUV71301934.mp4', + u'info_dict': { + u'title': u'Roar (Official)', + u'uploader': u'Katy Perry', + u'upload_date': u'20130905', + }, + u'params': { + u'skip_download': True, + }, + u'skip': u'VEVO is only available in some countries', + }, + ] def _real_extract(self, url): # Extract id and simplified title from URL @@ -53,6 +69,15 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): # Extract URL, uploader and title from webpage self.report_extraction(video_id) + # It may just embed a vevo video: + m_vevo = re.search( + r'[\w]*)', + webpage) + if m_vevo is not None: + vevo_id = m_vevo.group('id') + self.to_screen(u'Vevo video detected: %s' % vevo_id) + return self.url_result(u'vevo:%s' % vevo_id, ie='Vevo') + video_uploader = self._search_regex([r'(?im)[^<]+?]+?>([^<]+?)', # Looking for official user r'<(?:span|a) .*?rel="author".*?>([^<]+?) Date: Tue, 1 Oct 2013 11:58:13 -0400 Subject: [PATCH 023/264] [youtube] correct --no-playlist for python3 --- test/test_youtube_lists.py | 1 + youtube_dl/extractor/youtube.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index f28fe78e0..53e65816d 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -33,6 +33,7 @@ class TestYoutubeLists(unittest.TestCase): ie = YoutubePlaylistIE(dl) result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') self.assertEqual(result['_type'], 'url') + self.assertEqual(YoutubeIE()._extract_id(result['url']), 'FXxLjLQi3Fg') def test_issue_673(self): dl = FakeYDL() diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index c6876c69f..5d932f8a4 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -13,7 +13,6 @@ import struct import traceback import xml.etree.ElementTree import zlib -import urlparse from .common import InfoExtractor, SearchInfoExtractor from .subtitles import SubtitlesInfoExtractor @@ -24,6 +23,7 @@ from ..utils import ( compat_urllib_error, compat_urllib_parse, compat_urllib_request, + compat_urlparse, compat_str, clean_html, @@ -1527,7 +1527,7 @@ class YoutubePlaylistIE(InfoExtractor): playlist_id = mobj.group(1) or mobj.group(2) # Check if it's a video-specific URL - query_dict = urlparse.parse_qs(urlparse.urlparse(url).query) + query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) if 'v' in query_dict: video_id = query_dict['v'][0] if self._downloader.params.get('noplaylist'): From a8c6b2415535054862235a7286ac03db474c95a0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 2 Oct 2013 07:25:35 +0200 Subject: [PATCH 024/264] [youtube] Support videos without a title (Fixes #1391, Closes #1542) --- youtube_dl/extractor/youtube.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6580d8bf8..89e3d0f74 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1334,9 +1334,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): self._downloader.report_warning(u'unable to extract uploader nickname') # title - if 'title' not in video_info: - raise ExtractorError(u'Unable to extract video title') - video_title = compat_urllib_parse.unquote_plus(video_info['title'][0]) + if 'title' in video_info: + video_title = compat_urllib_parse.unquote_plus(video_info['title'][0]) + else: + self._downloader.report_warning(u'Unable to extract video title') + video_title = u'_' # thumbnail image # We try first to get a high quality image: From ca40186c75d93250f9115328838703b021f21c8a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 2 Oct 2013 08:19:56 +0200 Subject: [PATCH 025/264] [youtube] Fix static 82 signature (Closes #1539) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 89e3d0f74..4389924c5 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1087,7 +1087,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): elif len(s) == 83: return s[80:63:-1] + s[0] + s[62:0:-1] + s[63] elif len(s) == 82: - return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54] + return s[12] + s[79:12:-1] + s[80] + s[11::-1] elif len(s) == 81: return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] elif len(s) == 80: From c38b1e776dcc525b6fbe0660c484f1d50d2e0165 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 2 Oct 2013 08:41:03 +0200 Subject: [PATCH 026/264] [youtube] Simplify cache_dir code (#1529) --- youtube_dl/__init__.py | 8 +------- youtube_dl/extractor/youtube.py | 8 ++------ youtube_dl/utils.py | 6 ++++++ 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index cc771ee89..62b557986 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -113,12 +113,6 @@ def parseOpts(overrideArguments=None): pass return opts - xdg_cache_home = os.environ.get('XDG_CACHE_HOME') - if xdg_cache_home: - userCacheDir = os.path.join(xdg_cache_home, 'youtube-dl') - else: - userCacheDir = os.path.join(os.path.expanduser('~'), '.cache', 'youtube-dl') - max_width = 80 max_help_position = 80 @@ -174,7 +168,7 @@ def parseOpts(overrideArguments=None): general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL') general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') general.add_option( - '--cache-dir', dest='cachedir', default=userCacheDir, + '--cache-dir', dest='cachedir', default=get_cachedir(), help='Location in the filesystem where youtube-dl can store downloaded information permanently. %default by default') general.add_option( '--no-cache-dir', action='store_const', const=None, dest='cachedir', diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f3d279210..9ca29a043 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -27,6 +27,7 @@ from ..utils import ( compat_str, clean_html, + get_cachedir, get_element_by_id, ExtractorError, unescapeHTML, @@ -421,12 +422,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # Read from filesystem cache func_id = '%s_%s_%d' % (player_type, player_id, slen) assert os.path.basename(func_id) == func_id - xdg_cache_home = os.environ.get('XDG_CACHE_HOME') - if xdg_cache_home: - userCacheDir = os.path.join(xdg_cache_home, 'youtube-dl') - else: - userCacheDir = os.path.join(os.path.expanduser('~'), '.cache', 'youtube-dl') - cache_dir = self._downloader.params.get('cachedir', userCacheDir) + cache_dir = get_cachedir(self._downloader.params) cache_enabled = cache_dir is not None if cache_enabled: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 201ed255d..f5f9cde99 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -824,3 +824,9 @@ def intlist_to_bytes(xs): return ''.join([chr(x) for x in xs]) else: return bytes(xs) + + +def get_cachedir(params={}): + cache_root = os.environ.get('XDG_CACHE_HOME', + os.path.expanduser('~/.cache')) + return params.get('cachedir', os.path.join(cache_root, 'youtube-dl')) From be8fe32c92affa812c5522c463709ba5376a1ea6 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 2 Oct 2013 14:37:19 +0200 Subject: [PATCH 027/264] Fix help of --cachedir --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 62b557986..03df835f2 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -169,7 +169,7 @@ def parseOpts(overrideArguments=None): general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') general.add_option( '--cache-dir', dest='cachedir', default=get_cachedir(), - help='Location in the filesystem where youtube-dl can store downloaded information permanently. %default by default') + help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .') general.add_option( '--no-cache-dir', action='store_const', const=None, dest='cachedir', help='Disable filesystem caching') From ee6adb166c5f605b826c3bb2e05673fa193c0964 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 2 Oct 2013 20:59:34 +0200 Subject: [PATCH 028/264] [ign] Support more urls and detect multiple videos in articles (fixes #1543) --- youtube_dl/extractor/ign.py | 54 ++++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index b1c84278a..c52146f7d 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -13,7 +13,7 @@ class IGNIE(InfoExtractor): Some videos of it.ign.com are also supported """ - _VALID_URL = r'https?://.+?\.ign\.com/(?Pvideos|show_videos|articles)(/.+)?/(?P.+)' + _VALID_URL = r'https?://.+?\.ign\.com/(?Pvideos|show_videos|articles|(?:[^/]*/feature))(/.+)?/(?P.+)' IE_NAME = u'ign.com' _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config' @@ -21,15 +21,39 @@ class IGNIE(InfoExtractor): r'id="my_show_video">.*?

(.*?)

', ] - _TEST = { - u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review', - u'file': u'8f862beef863986b2785559b9e1aa599.mp4', - u'md5': u'eac8bdc1890980122c3b66f14bdd02e9', - u'info_dict': { - u'title': u'The Last of Us Review', - u'description': u'md5:c8946d4260a4d43a00d5ae8ed998870c', - } - } + _TESTS = [ + { + u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review', + u'file': u'8f862beef863986b2785559b9e1aa599.mp4', + u'md5': u'eac8bdc1890980122c3b66f14bdd02e9', + u'info_dict': { + u'title': u'The Last of Us Review', + u'description': u'md5:c8946d4260a4d43a00d5ae8ed998870c', + } + }, + { + u'url': u'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind', + u'playlist': [ + { + u'file': u'5ebbd138523268b93c9141af17bec937.mp4', + u'info_dict': { + u'title': u'GTA 5 Video Review', + u'description': u'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.', + }, + }, + { + u'file': u'638672ee848ae4ff108df2a296418ee2.mp4', + u'info_dict': { + u'title': u'GTA 5\'s Twisted Beauty in Super Slow Motion', + u'description': u'The twisted beauty of GTA 5 in stunning slow motion.', + }, + }, + ], + u'params': { + u'skip_download': True, + }, + }, + ] def _find_video_id(self, webpage): res_id = [r'data-video-id="(.+?)"', @@ -46,6 +70,13 @@ class IGNIE(InfoExtractor): if page_type == 'articles': video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url') return self.url_result(video_url, ie='IGN') + elif page_type != 'video': + multiple_urls = re.findall( + ' Date: Fri, 4 Oct 2013 00:31:10 +0200 Subject: [PATCH 029/264] [france2] Add support for URLs without video IDs (Fixes #1547) --- youtube_dl/extractor/francetv.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index b1530e549..461dac8ef 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -70,7 +70,11 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor): class France2IE(FranceTVBaseInfoExtractor): IE_NAME = u'france2.fr' - _VALID_URL = r'https?://www\.france2\.fr/emissions/.*?/videos/(?P\d+)' + _VALID_URL = r'''(?x)https?://www\.france2\.fr/ + (?: + emissions/.*?/videos/(?P\d+) + | emission/(?P[^/?]+) + )''' _TEST = { u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104', @@ -86,7 +90,15 @@ class France2IE(FranceTVBaseInfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + if mobj.group('key'): + webpage = self._download_webpage(url, mobj.group('key')) + video_id = self._html_search_regex( + r'''(?x)\s* + ''', + webpage, u'video ID') + else: + video_id = mobj.group('id') return self._extract_video(video_id) From 9ab1018b1adfdbf717872dd3b8cbeb99e0825763 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 4 Oct 2013 00:38:19 +0200 Subject: [PATCH 030/264] release 2013.10.04 --- README.md | 5 +++-- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 66a483b76..14d62b189 100644 --- a/README.md +++ b/README.md @@ -31,8 +31,9 @@ which means you can modify it, redistribute it or use it however you like. --proxy URL Use the specified HTTP/HTTPS proxy --no-check-certificate Suppress HTTPS certificate validation. --cache-dir None Location in the filesystem where youtube-dl can - store downloaded information permanently. - ~/.youtube-dl/cache by default + store downloaded information permanently. By + default $XDG_CACHE_HOME/youtube-dl or ~/.cache + /youtube-dl . --no-cache-dir Disable filesystem caching ## Video Selection: diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8e9ca6126..e773e82da 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.01.1' +__version__ = '2013.10.04' From c21315f273c7f2877dfbd43fe3b56d636091e1c0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 4 Oct 2013 00:43:01 +0200 Subject: [PATCH 031/264] [youtube] new static 82 signature --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9ca29a043..39ff33290 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1087,7 +1087,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): elif len(s) == 83: return s[80:63:-1] + s[0] + s[62:0:-1] + s[63] elif len(s) == 82: - return s[12] + s[79:12:-1] + s[80] + s[11::-1] + return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37] elif len(s) == 81: return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] elif len(s) == 80: From 5c1d63b73737bb23885ae6079e2004b5f084eb9c Mon Sep 17 00:00:00 2001 From: Pierre Rudloff Date: Fri, 4 Oct 2013 01:04:38 +0200 Subject: [PATCH 032/264] Changes suggested by @phihag --- youtube_dl/extractor/websurg.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/websurg.py b/youtube_dl/extractor/websurg.py index efc8029af..849334aa0 100644 --- a/youtube_dl/extractor/websurg.py +++ b/youtube_dl/extractor/websurg.py @@ -36,21 +36,21 @@ class WeBSurgIE(InfoExtractor): request.add_header( 'Content-Type', 'application/x-www-form-urlencoded;charset=utf-8') compat_urllib_request.urlopen(request).info() + request = compat_urllib_request.Request(self._LOGIN_URL) + webpage = compat_urllib_request.urlopen(request).read() + + if webpage != 'OK': + self._downloader.report_error( + u'Unable to log in: bad username/password') def _real_extract(self, url): - - request = compat_urllib_request.Request(url) - webpage = unicode( - compat_urllib_request.urlopen(request).read(), 'utf-8') - video_id = re.match(self._VALID_URL, url).group(1) + request = compat_urllib_request.Request(url) + webpage = self._download_webpage(url, video_id) + url_info = re.search(r'streamer="(.*?)" src="(.*?)"', webpage) - if url_info is None: - self._downloader.report_warning( - u'Unable to log in: bad username/password') - return return {'id': video_id, 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), From b039775057abf6005ceef2819a746c9f3b671cd3 Mon Sep 17 00:00:00 2001 From: Pierre Rudloff Date: Fri, 4 Oct 2013 01:07:24 +0200 Subject: [PATCH 033/264] Unused variable --- youtube_dl/extractor/websurg.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/websurg.py b/youtube_dl/extractor/websurg.py index 849334aa0..96a1bb852 100644 --- a/youtube_dl/extractor/websurg.py +++ b/youtube_dl/extractor/websurg.py @@ -46,7 +46,6 @@ class WeBSurgIE(InfoExtractor): def _real_extract(self, url): video_id = re.match(self._VALID_URL, url).group(1) - request = compat_urllib_request.Request(url) webpage = self._download_webpage(url, video_id) url_info = re.search(r'streamer="(.*?)" src="(.*?)"', webpage) From 73b4fafd82256c66198b1670d1a6dccfaf5f782c Mon Sep 17 00:00:00 2001 From: Pierre Rudloff Date: Fri, 4 Oct 2013 01:12:42 +0200 Subject: [PATCH 034/264] Use self._download_webpage everywhere --- youtube_dl/extractor/websurg.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/websurg.py b/youtube_dl/extractor/websurg.py index 96a1bb852..7d335d444 100644 --- a/youtube_dl/extractor/websurg.py +++ b/youtube_dl/extractor/websurg.py @@ -36,8 +36,7 @@ class WeBSurgIE(InfoExtractor): request.add_header( 'Content-Type', 'application/x-www-form-urlencoded;charset=utf-8') compat_urllib_request.urlopen(request).info() - request = compat_urllib_request.Request(self._LOGIN_URL) - webpage = compat_urllib_request.urlopen(request).read() + webpage = self._download_webpage(self._LOGIN_URL, '', 'Logging in') if webpage != 'OK': self._downloader.report_error( From 829493439aba4a5feae03729d4dbb3e2b45f0949 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Fri, 4 Oct 2013 07:47:40 +0200 Subject: [PATCH 035/264] [FlickrIE] Fix HTTPS url --- youtube_dl/extractor/flickr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py index 80d96baf7..e1d2f0526 100644 --- a/youtube_dl/extractor/flickr.py +++ b/youtube_dl/extractor/flickr.py @@ -9,7 +9,7 @@ from ..utils import ( class FlickrIE(InfoExtractor): """Information Extractor for Flickr videos""" - _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P[\w\-_@]+)/(?P\d+).*' + _VALID_URL = r'(?:https?://)?(?:www\.|secure\.)?flickr\.com/photos/(?P[\w\-_@]+)/(?P\d+).*' _TEST = { u'url': u'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/', u'file': u'5645318632.mp4', From 0d8cb1cc14cde1b17385901ff97b8525bcc1cc5e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 4 Oct 2013 10:32:34 +0200 Subject: [PATCH 036/264] [ted] Prepare #980 merge --- youtube_dl/extractor/ted.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 4c11f7a03..dfa1176a3 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -77,12 +77,20 @@ class TEDIE(InfoExtractor): thumbnail = self._search_regex(r'
[\s.]*[\s.]* Date: Fri, 4 Oct 2013 10:40:42 +0200 Subject: [PATCH 037/264] Document formats (for #980) --- youtube_dl/extractor/common.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 77726ee24..b577c9e6d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -52,8 +52,19 @@ class InfoExtractor(object): view_count: How many users have watched the video on the platform. urlhandle: [internal] The urlHandle to be used to download the file, like returned by urllib.request.urlopen + formats: A list of dictionaries for each format available, it must + be ordered from worst to best quality. Potential fields: + * url Mandatory. The URL of the video file + * ext Will be calculated from url if missing + * format A human-readable description of the format + ("mp4 container with h264/opus"). + Calculated from width and height if missing. + * format_id A short description of the format + ("mp4_h264_opus" or "19") + * width Width of the video, if known + * height Height of the video, if known - The fields should all be Unicode strings. + Unless mentioned otherwise, the fields should be Unicode strings. Subclasses of this one should re-define the _real_initialize() and _real_extract() methods and define a _VALID_URL regexp. From 2f5865cc6d08d3c1d49f3182335a4ae10cbd788a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 4 Oct 2013 11:09:43 +0200 Subject: [PATCH 038/264] Clarify that url and ext are optional when formats is given (#980) --- youtube_dl/extractor/common.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b577c9e6d..69cdcdc1b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -35,6 +35,8 @@ class InfoExtractor(object): title: Video title, unescaped. ext: Video filename extension. + Instead of url and ext, formats can also specified. + The following fields are optional: format: The video format, defaults to ext (used for --get-format) From f13d09332d320d514d508eaf8f2b889fa570816b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 4 Oct 2013 11:10:04 +0200 Subject: [PATCH 039/264] [mtv] Prepare for #980 --- youtube_dl/extractor/mtv.py | 47 ++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 8f956571d..001a576a8 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -54,23 +54,26 @@ class MTVIE(InfoExtractor): def _get_thumbnail_url(self, uri, itemdoc): return 'http://mtv.mtvnimages.com/uri/' + uri - def _extract_video_url(self, metadataXml): + def _extract_video_formats(self, metadataXml): if '/error_country_block.swf' in metadataXml: raise ExtractorError(u'This video is not available from your country.', expected=True) mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8')) renditions = mdoc.findall('.//rendition') - # For now, always pick the highest quality. - rendition = renditions[-1] - - try: - _,_,ext = rendition.attrib['type'].partition('/') - format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate'] - rtmp_video_url = rendition.find('./src').text - except KeyError: - raise ExtractorError('Invalid rendition field.') - video_url = self._transform_rtmp_url(rtmp_video_url) - return {'ext': ext, 'url': video_url, 'format': format} + formats = [] + for rendition in mdoc.findall('.//rendition'): + try: + _, _, ext = rendition.attrib['type'].partition('/') + rtmp_video_url = rendition.find('./src').text + formats.append({'ext': ext, + 'url': self._transform_rtmp_url(rtmp_video_url), + 'format_id': rendition.get('bitrate'), + 'width': int(rendition.get('width')), + 'height': int(rendition.get('height')), + }) + except (KeyError, TypeError): + raise ExtractorError('Invalid rendition field.') + return formats def _get_video_info(self, itemdoc): uri = itemdoc.find('guid').text @@ -81,19 +84,25 @@ class MTVIE(InfoExtractor): mediagen_url += '&acceptMethods=fms' mediagen_page = self._download_webpage(mediagen_url, video_id, u'Downloading video urls') - video_info = self._extract_video_url(mediagen_page) description_node = itemdoc.find('description') if description_node is not None: description = description_node.text else: description = None - video_info.update({'title': itemdoc.find('title').text, - 'id': video_id, - 'thumbnail': self._get_thumbnail_url(uri, itemdoc), - 'description': description, - }) - return video_info + + info = { + 'title': itemdoc.find('title').text, + 'formats': self._extract_video_formats(mediagen_page), + 'id': video_id, + 'thumbnail': self._get_thumbnail_url(uri, itemdoc), + 'description': description, + } + + # TODO: Remove when #980 has been merged + info.update(info['formats'][-1]) + + return info def _get_videos_info(self, uri): video_id = self._id_from_uri(uri) From d93bdee9a67c0203efd439684b269d0e1a805827 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 4 Oct 2013 11:14:10 +0200 Subject: [PATCH 040/264] [comedycentral] Prepare for generic video extraction (#980) --- youtube_dl/extractor/comedycentral.py | 63 ++++++++++++--------------- 1 file changed, 28 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index bf8d711ee..69b2beece 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -51,12 +51,12 @@ class ComedyCentralIE(InfoExtractor): '400': 'mp4', } _video_dimensions = { - '3500': '1280x720', - '2200': '960x540', - '1700': '768x432', - '1200': '640x360', - '750': '512x288', - '400': '384x216', + '3500': (1280, 720), + '2200': (960, 540), + '1700': (768, 432), + '1200': (640, 360), + '750': (512, 288), + '400': (384, 216), } @classmethod @@ -64,11 +64,13 @@ class ComedyCentralIE(InfoExtractor): """Receives a URL and returns True if suitable for this IE.""" return re.match(cls._VALID_URL, url, re.VERBOSE) is not None - def _print_formats(self, formats): - print('Available formats:') - for x in formats: - print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???'))) - + @staticmethod + def _transform_rtmp_url(rtmp_video_url): + m = re.match(r'^rtmpe?://.*?/(?Pgsp.comedystor/.*)$', rtmp_video_url) + if not m: + raise ExtractorError(u'Cannot transform RTMP url') + base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' + return base + m.group('finalid') def _real_extract(self, url): mobj = re.match(self._VALID_URL, url, re.VERBOSE) @@ -155,40 +157,31 @@ class ComedyCentralIE(InfoExtractor): self._downloader.report_error(u'unable to download ' + mediaId + ': No videos found') continue - if self._downloader.params.get('listformats', None): - self._print_formats([i[0] for i in turls]) - return - - # For now, just pick the highest bitrate - format,rtmp_video_url = turls[-1] - - # Get the format arg from the arg stream - req_format = self._downloader.params.get('format', None) - - # Select format if we can find one - for f,v in turls: - if f == req_format: - format, rtmp_video_url = f, v - break - - m = re.match(r'^rtmpe?://.*?/(?Pgsp.comedystor/.*)$', rtmp_video_url) - if not m: - raise ExtractorError(u'Cannot transform RTMP url') - base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' - video_url = base + m.group('finalid') + formats = [] + for format, rtmp_video_url in turls: + w, h = self._video_dimensions.get(format, (None, None)) + formats.append({ + 'url': self._transform_rtmp_url(rtmp_video_url), + 'ext': self._video_extensions.get(format, 'mp4'), + 'format_id': format, + 'height': h, + 'width': w, + }) effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1) info = { 'id': shortMediaId, - 'url': video_url, + 'formats': formats, 'uploader': showId, 'upload_date': officialDate, 'title': effTitle, - 'ext': 'mp4', - 'format': format, 'thumbnail': None, 'description': compat_str(officialTitle), } + + # TODO: Remove when #980 has been merged + info.update(info['formats'][-1]) + results.append(info) return results From 9e0f897f6bf763f2f0c4901616b6c4745700ddea Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 4 Oct 2013 11:30:01 +0200 Subject: [PATCH 041/264] [francetv] Use common format for ID of generation-quoi subextractor --- youtube_dl/extractor/francetv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 461dac8ef..086cafca0 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -103,7 +103,7 @@ class France2IE(FranceTVBaseInfoExtractor): class GenerationQuoiIE(InfoExtractor): - IE_NAME = u'http://generation-quoi.france2.fr' + IE_NAME = u'france2.fr:generation-quoi' _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P.*)(\?|$)' _TEST = { From 9f1f6d24378c2af8e76e149937de202f121dc7d0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 4 Oct 2013 11:33:14 +0200 Subject: [PATCH 042/264] [rtlnow] Skip test on travis --- youtube_dl/extractor/rtlnow.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py index 580f9e6d5..d1b08c9bc 100644 --- a/youtube_dl/extractor/rtlnow.py +++ b/youtube_dl/extractor/rtlnow.py @@ -86,8 +86,10 @@ class RTLnowIE(InfoExtractor): u'params': { u'skip_download': True, }, + u'skip': u'Only works from Germany', }] + def _real_extract(self,url): mobj = re.match(self._VALID_URL, url) From 466880f53106a2604235d82f38a9f2f82827c268 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 4 Oct 2013 11:34:12 +0200 Subject: [PATCH 043/264] [yahoo] Do not try to run rtmpdump on travis --- youtube_dl/extractor/yahoo.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 39126e631..e6d5d7969 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -22,6 +22,10 @@ class YahooIE(InfoExtractor): u'title': u'Julian Smith & Travis Legg Watch Julian Smith', u'description': u'Julian and Travis watch Julian Smith', }, + u'params': { + # Requires rtmpdump + u'skip_download': True, + }, }, { u'url': u'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html', From 7f8ae73a5d866d52989f2d47d200476e572ca185 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 4 Oct 2013 11:36:04 +0200 Subject: [PATCH 044/264] Include length in player cache ID Some videos use the same player with IDs of multiple lengths. See https://travis-ci.org/rg3/youtube-dl/jobs/12126506#L319 for an example. --- youtube_dl/extractor/youtube.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 39ff33290..1101011ea 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1037,12 +1037,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if player_url is not None: try: - if player_url not in self._player_cache: + player_id = (player_url, len(s)) + if player_id not in self._player_cache: func = self._extract_signature_function( video_id, player_url, len(s) ) - self._player_cache[player_url] = func - func = self._player_cache[player_url] + self._player_cache[player_id] = func + func = self._player_cache[player_id] if self._downloader.params.get('youtube_print_sig_code'): self._print_sig_code(func, len(s)) return func(s) From ba2d9f213e3bc9ea0c65e7715702d2e89964dbe7 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 4 Oct 2013 11:38:56 +0200 Subject: [PATCH 045/264] [jeuxvideo] fix video file md5sum --- youtube_dl/extractor/jeuxvideo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py index 4327bc13d..ae2e37a70 100644 --- a/youtube_dl/extractor/jeuxvideo.py +++ b/youtube_dl/extractor/jeuxvideo.py @@ -12,7 +12,7 @@ class JeuxVideoIE(InfoExtractor): _TEST = { u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm', u'file': u'5182.mp4', - u'md5': u'e0fdb0cd3ce98713ef9c1e1e025779d0', + u'md5': u'046e491afb32a8aaac1f44dd4ddd54ee', u'info_dict': { u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité', u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n', From cd214418f611f7071417df0063d115ea911705a3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 4 Oct 2013 11:41:57 +0200 Subject: [PATCH 046/264] [redtube] pep8 --- youtube_dl/extractor/redtube.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index 1d2cf1f56..bb19b898a 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -14,24 +14,25 @@ class RedTubeIE(InfoExtractor): } } - def _real_extract(self,url): + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - video_extension = 'mp4' + video_extension = 'mp4' webpage = self._download_webpage(url, video_id) self.report_extraction(video_id) - video_url = self._html_search_regex(r'', - webpage, u'video URL') + video_url = self._html_search_regex( + r'', webpage, u'video URL') - video_title = self._html_search_regex('

(.+?)

', + video_title = self._html_search_regex( + r'

(.+?)

', webpage, u'title') - return [{ + return { 'id': video_id, 'url': video_url, 'ext': video_extension, 'title': video_title, - }] + } From 5e2a60db4a383ebe87c561234e79d55df1b8685e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 4 Oct 2013 11:44:02 +0200 Subject: [PATCH 047/264] [yahoo] Fix test title --- youtube_dl/extractor/yahoo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index e6d5d7969..dfedf5ecb 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -31,7 +31,7 @@ class YahooIE(InfoExtractor): u'url': u'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html', u'file': u'103000935.flv', u'info_dict': { - u'title': u'The Cougar Lies with Spanish Moss', + u'title': u'Codefellas - The Cougar Lies with Spanish Moss', u'description': u'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?', }, u'params': { From 17ad2b3fb192d8e13a39971c45248017cb279ab3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 4 Oct 2013 11:44:56 +0200 Subject: [PATCH 048/264] [yahoo] Switch ext of test --- youtube_dl/extractor/yahoo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index dfedf5ecb..5bdd5d591 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -17,7 +17,7 @@ class YahooIE(InfoExtractor): _TESTS = [ { u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', - u'file': u'214727115.mp4', + u'file': u'214727115.flv', u'info_dict': { u'title': u'Julian Smith & Travis Legg Watch Julian Smith', u'description': u'Julian and Travis watch Julian Smith', From 46e28a84caae4820c7352232b048ecb3cbcd4012 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 4 Oct 2013 11:53:49 +0200 Subject: [PATCH 049/264] [brightcove] Fix up some broken HTML (#1553) --- youtube_dl/extractor/brightcove.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 558b3d009..745212f2f 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -49,6 +49,11 @@ class BrightcoveIE(InfoExtractor): Build a Brightcove url from a xml string containing {params} """ + + # Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553 + object_str = re.sub(r'(', + lambda m: m.group(1) + '/>', object_str) + object_doc = xml.etree.ElementTree.fromstring(object_str) assert u'BrightcoveExperience' in object_doc.attrib['class'] params = {'flashID': object_doc.attrib['id'], From c3fef636b55f30f58a1bd9c0c2a80e20365eee7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 4 Oct 2013 14:07:29 +0200 Subject: [PATCH 050/264] [dailymotion] Fix playlist extraction The html code has changed, make the video ids extraction more solid. --- youtube_dl/extractor/dailymotion.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 259806f38..7d8353946 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -10,6 +10,7 @@ from ..utils import ( compat_str, get_element_by_attribute, get_element_by_id, + orderedSet, ExtractorError, ) @@ -158,12 +159,12 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): id, u'Downloading page %s' % pagenum) playlist_el = get_element_by_attribute(u'class', u'video_list', webpage) - video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el)) + video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el)) if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: break return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') - for video_id in video_ids] + for video_id in orderedSet(video_ids)] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 400afddaf49353c9b4c31d17d5efe4045e500fec Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Sat, 5 Oct 2013 09:37:11 +0200 Subject: [PATCH 051/264] Add CinemassacreIE --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/cinemassacre.py | 100 +++++++++++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 youtube_dl/extractor/cinemassacre.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index d1b7e5f99..db30edc27 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -12,6 +12,7 @@ from .brightcove import BrightcoveIE from .c56 import C56IE from .canalplus import CanalplusIE from .canalc2 import Canalc2IE +from .cinemassacre import CinemassacreIE from .cnn import CNNIE from .collegehumor import CollegeHumorIE from .comedycentral import ComedyCentralIE diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py new file mode 100644 index 000000000..16eaff3a1 --- /dev/null +++ b/youtube_dl/extractor/cinemassacre.py @@ -0,0 +1,100 @@ +# encoding: utf-8 +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, +) + +class CinemassacreIE(InfoExtractor): + """Information Extractor for Cinemassacre""" + _VALID_URL = r'(?:http://)?(?:www\.)?(?Pcinemassacre\.com/(?P[0-9]{4})/(?P[0-9]{2})/(?P[0-9]{2})/.+?)(?:[/?].*)?' + _TESTS = [{ + u'url': u'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', + u'file': u'19911.mp4', + u'info_dict': { + u'upload_date': u'20121110', + u'title': u'“Angry Video Game Nerd: The Movie” – Trailer', + #u'description': u'“Angry Video Game Nerd: The Movie” is...', # Description is too long + }, + u'params': { + u'skip_download': True, + }, + }] + + def _real_extract(self,url): + mobj = re.match(self._VALID_URL, url) + + webpage_url = u'http://' + mobj.group('url') + webpage = self._download_webpage(webpage_url, None) # Don't know video id yet + video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') + video_id = self._html_search_regex(r'src="http://player\.screenwavemedia\.com/play/embed\.php\?id=(?P.+?)"', + webpage, u'video_id') + video_title = self._html_search_regex(r'

(?P.+?)</h1>[^<]*</div>', + webpage, u'title') + video_description = self._html_search_regex(r'<div class="entry-content">(?P<description>.+?)</div>', + webpage, u'description', flags=re.DOTALL, fatal=False) + + playerdata_url = u'http://player.screenwavemedia.com/play/player.php?id=' + video_id + playerdata = self._download_webpage(playerdata_url, video_id) + base_url = self._html_search_regex(r'\'streamer\': \'(?P<base_url>rtmp://[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})/vod\'', + playerdata, u'base_url') + base_url += '/Cinemassacre/' + # The file names in playerdata are wrong for some videos??? + sd_file = 'Cinemassacre-%s_high.mp4' % video_id + hd_file = 'Cinemassacre-%s.mp4' % video_id + video_thumbnail = 'http://image.screenwavemedia.com/Cinemassacre/Cinemassacre-%s_thumb_640x360.jpg' % video_id + + formats = [{ + 'id': video_id, + 'url': base_url + hd_file, + 'format': 'hd', + 'ext': 'mp4', + 'title': video_title, + 'description': video_description, + 'upload_date': video_date, + 'thumbnail': video_thumbnail, + }, + { + 'id': video_id, + 'url': base_url + sd_file, + 'ext': 'mp4', + 'format': 'sd', + 'title': video_title, + 'description': video_description, + 'upload_date': video_date, + 'thumbnail': video_thumbnail, + }] + + if self._downloader.params.get('listformats', None): + self._print_formats(formats) + return + + req_format = self._downloader.params.get('format', 'best') + self.to_screen(u'Format: %s' % req_format) + + if req_format is None or req_format == 'best': + return [formats[0]] + elif req_format == 'worst': + return [formats[-1]] + elif req_format in ('-1', 'all'): + return formats + else: + format = self._specific( req_format, formats ) + if format is None: + raise ExtractorError(u'Requested format not available') + return [format] + + def _print_formats(self, formats): + """Print all available formats""" + print(u'Available formats:') + print(u'ext\t\tformat') + print(u'---------------------------------') + for format in formats: + print(u'%s\t\t%s' % (format['ext'], format['format'])) + + def _specific(self, req_format, formats): + for x in formats: + if x["format"] == req_format: + return x + return None From 1ece880d7c94d9b966f52855949aae6c0f37a140 Mon Sep 17 00:00:00 2001 From: rzhxeo <rzhxeot7z81b4700@mailcatch.com> Date: Sat, 5 Oct 2013 13:36:13 +0200 Subject: [PATCH 052/264] [CinemassacreIE] Add support for other embed methods --- youtube_dl/extractor/cinemassacre.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py index 16eaff3a1..f0629ee93 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/cinemassacre.py @@ -20,6 +20,17 @@ class CinemassacreIE(InfoExtractor): u'params': { u'skip_download': True, }, + }, + { + u'url': u'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', + u'file': u'521be8ef82b16.mp4', + u'info_dict': { + u'upload_date': u'20131002', + u'title': u'The Mummy’s Hand (1940)', + }, + u'params': { + u'skip_download': True, + }, }] def _real_extract(self,url): @@ -28,19 +39,24 @@ class CinemassacreIE(InfoExtractor): webpage_url = u'http://' + mobj.group('url') webpage = self._download_webpage(webpage_url, None) # Don't know video id yet video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') - video_id = self._html_search_regex(r'src="http://player\.screenwavemedia\.com/play/embed\.php\?id=(?P<video_id>.+?)"', - webpage, u'video_id') - video_title = self._html_search_regex(r'<h1 class="entry-title">(?P<title>.+?)</h1>[^<]*</div>', + mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/(?:embed|player)\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage) + if not mobj: + raise ExtractorError(u'Can\'t extract embed url and video id') + playerdata_url = mobj.group(u'embed_url') + video_id = mobj.group(u'video_id') + + video_title = self._html_search_regex(r'<title>(?P<title>.+?)\|', webpage, u'title') video_description = self._html_search_regex(r'<div class="entry-content">(?P<description>.+?)</div>', webpage, u'description', flags=re.DOTALL, fatal=False) + if len(video_description) == 0: + video_description = None - playerdata_url = u'http://player.screenwavemedia.com/play/player.php?id=' + video_id playerdata = self._download_webpage(playerdata_url, video_id) - base_url = self._html_search_regex(r'\'streamer\': \'(?P<base_url>rtmp://[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})/vod\'', + base_url = self._html_search_regex(r'\'streamer\': \'(?P<base_url>rtmp://[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})/(?:vod|Cinemassacre)\'', playerdata, u'base_url') base_url += '/Cinemassacre/' - # The file names in playerdata are wrong for some videos??? + # Important: The file names in playerdata are not used by the player and even wrong for some videos sd_file = 'Cinemassacre-%s_high.mp4' % video_id hd_file = 'Cinemassacre-%s.mp4' % video_id video_thumbnail = 'http://image.screenwavemedia.com/Cinemassacre/Cinemassacre-%s_thumb_640x360.jpg' % video_id From 91a26ca559d225307d1bcaac74a5ca499748adc5 Mon Sep 17 00:00:00 2001 From: rzhxeo <rzhxeot7z81b4700@mailcatch.com> Date: Sat, 5 Oct 2013 13:40:05 +0200 Subject: [PATCH 053/264] [CinemassacreIE] Remove docstring from class --- youtube_dl/extractor/cinemassacre.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py index f0629ee93..181f57e76 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/cinemassacre.py @@ -7,7 +7,6 @@ from ..utils import ( ) class CinemassacreIE(InfoExtractor): - """Information Extractor for Cinemassacre""" _VALID_URL = r'(?:http://)?(?:www\.)?(?P<url>cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?)(?:[/?].*)?' _TESTS = [{ u'url': u'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', From ca215e0a4fdf42cba913f5b21d0e9e0e46814102 Mon Sep 17 00:00:00 2001 From: rzhxeo <rzhxeot7z81b4700@mailcatch.com> Date: Sat, 5 Oct 2013 13:42:17 +0200 Subject: [PATCH 054/264] [CinemassacreIE] Use MD5 to check in TEST description --- youtube_dl/extractor/cinemassacre.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py index 181f57e76..17a7916cb 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/cinemassacre.py @@ -14,7 +14,7 @@ class CinemassacreIE(InfoExtractor): u'info_dict': { u'upload_date': u'20121110', u'title': u'“Angry Video Game Nerd: The Movie” – Trailer', - #u'description': u'“Angry Video Game Nerd: The Movie” is...', # Description is too long + u'description': u'md5:fb87405fcb42a331742a0dce2708560b', }, u'params': { u'skip_download': True, From 97dae9ae07a1ecd58a412a15162826fde604db60 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 5 Oct 2013 16:12:21 +0200 Subject: [PATCH 055/264] [bliptv] Make sure video ID is a string --- youtube_dl/extractor/bliptv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index 08b28c994..493504f75 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -115,7 +115,7 @@ class BlipTVIE(InfoExtractor): ext = umobj.group(1) info = { - 'id': data['item_id'], + 'id': compat_str(data['item_id']), 'url': video_url, 'uploader': data['display_name'], 'upload_date': upload_date, From e94b783c741b720ab4ee70eb7fc8764be89d63d5 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 5 Oct 2013 16:38:33 +0200 Subject: [PATCH 056/264] [googleplus] Fix upload_date detection --- youtube_dl/extractor/googleplus.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index 8895ad289..ab12d7e93 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -41,7 +41,8 @@ class GooglePlusIE(InfoExtractor): # Extract update date upload_date = self._html_search_regex( - ['title="Timestamp">(.*?)</a>', r'<a.+?class="g-M.+?>(.+?)</a>'], + r'''(?x)<a.+?class="o-T-s\s[^"]+"\s+style="display:\s*none"\s*> + ([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''', webpage, u'upload date', fatal=False) if upload_date: # Convert timestring to a format suitable for filename From 00fcc17aeeab11ce694699bf183d33a3af75aab6 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda <filippo.valsorda@gmail.com> Date: Sat, 5 Oct 2013 15:55:58 -0400 Subject: [PATCH 057/264] add capability to suppress expected warnings in tests --- test/helper.py | 15 +++++++++++++++ test/test_dailymotion_subtitles.py | 5 +++-- test/test_youtube_subtitles.py | 4 ++-- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/test/helper.py b/test/helper.py index a2b468b50..63f56841f 100644 --- a/test/helper.py +++ b/test/helper.py @@ -1,6 +1,8 @@ import io import json import os.path +import re +import types import youtube_dl.extractor from youtube_dl import YoutubeDL, YoutubeDLHandler @@ -32,6 +34,19 @@ class FakeYDL(YoutubeDL): raise Exception(s) def download(self, x): self.result.append(x) + # def expect_warning(self, regex): + # # Silence an expected warning matching a regex + # def report_warning(self, message): + # if re.match(regex, message): return + # super(FakeYDL, self).report_warning(regex) + # self.report_warning = types.MethodType(report_warning, self) + def expect_warning(self, regex): + # Silence an expected warning matching a regex + old_report_warning = self.report_warning + def report_warning(self, message): + if re.match(regex, message): return + old_report_warning(message) + self.report_warning = types.MethodType(report_warning, self) def get_testcases(): for ie in youtube_dl.extractor.gen_extractors(): diff --git a/test/test_dailymotion_subtitles.py b/test/test_dailymotion_subtitles.py index 83c65d57e..ed2ad311d 100644 --- a/test/test_dailymotion_subtitles.py +++ b/test/test_dailymotion_subtitles.py @@ -2,8 +2,6 @@ import sys import unittest -import json -import io import hashlib # Allow direct execution @@ -45,15 +43,18 @@ class TestDailymotionSubtitles(unittest.TestCase): subtitles = self.getSubtitles() self.assertEqual(len(subtitles.keys()), 5) def test_list_subtitles(self): + self.DL.expect_warning(u'Automatic Captions not supported by this server') self.DL.params['listsubtitles'] = True info_dict = self.getInfoDict() self.assertEqual(info_dict, None) def test_automatic_captions(self): + self.DL.expect_warning(u'Automatic Captions not supported by this server') self.DL.params['writeautomaticsub'] = True self.DL.params['subtitleslang'] = ['en'] subtitles = self.getSubtitles() self.assertTrue(len(subtitles.keys()) == 0) def test_nosubtitles(self): + self.DL.expect_warning(u'video doesn\'t have subtitles') self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index 168e6c66c..f9b0c1ad0 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -2,8 +2,6 @@ import sys import unittest -import json -import io import hashlib # Allow direct execution @@ -56,6 +54,7 @@ class TestYoutubeSubtitles(unittest.TestCase): subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7') def test_youtube_list_subtitles(self): + self.DL.expect_warning(u'Video doesn\'t have automatic captions') self.DL.params['listsubtitles'] = True info_dict = self.getInfoDict() self.assertEqual(info_dict, None) @@ -66,6 +65,7 @@ class TestYoutubeSubtitles(unittest.TestCase): subtitles = self.getSubtitles() self.assertTrue(subtitles['it'] is not None) def test_youtube_nosubtitles(self): + self.DL.expect_warning(u'video doesn\'t have subtitles') self.url = 'sAjKT8FhjI8' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True From 79cfb46d42cf0cd296acf7f0689d2ad4b2e7f971 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda <filippo.valsorda@gmail.com> Date: Sat, 5 Oct 2013 16:08:48 -0400 Subject: [PATCH 058/264] add tox configuration file for easy testing --- .gitignore | 1 + tox.ini | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 tox.ini diff --git a/.gitignore b/.gitignore index 24fdb3626..7dd0ad09b 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,4 @@ updates_key.pem *.mp4 *.part test/testdata +.tox diff --git a/tox.ini b/tox.ini new file mode 100644 index 000000000..53b461fdb --- /dev/null +++ b/tox.ini @@ -0,0 +1,5 @@ +[tox] +envlist = py26,py27,py33 +[testenv] +deps = nose +commands = nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose test From 8932a66e49dda60bdb6ddb1447df63fea5c4f320 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda <filippo.valsorda@gmail.com> Date: Sat, 5 Oct 2013 16:38:37 -0400 Subject: [PATCH 059/264] [fixup] remove unnecessary commented function --- test/helper.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/test/helper.py b/test/helper.py index 63f56841f..8e641e3cb 100644 --- a/test/helper.py +++ b/test/helper.py @@ -34,12 +34,6 @@ class FakeYDL(YoutubeDL): raise Exception(s) def download(self, x): self.result.append(x) - # def expect_warning(self, regex): - # # Silence an expected warning matching a regex - # def report_warning(self, message): - # if re.match(regex, message): return - # super(FakeYDL, self).report_warning(regex) - # self.report_warning = types.MethodType(report_warning, self) def expect_warning(self, regex): # Silence an expected warning matching a regex old_report_warning = self.report_warning From c1c9a79c49e8656f3244744e6f4e336e47a03206 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 6 Oct 2013 04:27:09 +0200 Subject: [PATCH 060/264] Add basic --download-archive option Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time. When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped. --- youtube_dl/YoutubeDL.py | 32 ++++++++++++++ youtube_dl/__init__.py | 4 ++ youtube_dl/utils.py | 96 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 132 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 2503fd09b..1f5f75e30 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -3,6 +3,7 @@ from __future__ import absolute_import +import errno import io import os import re @@ -84,6 +85,9 @@ class YoutubeDL(object): cachedir: Location of the cache files in the filesystem. None to disable filesystem cache. noplaylist: Download single video instead of a playlist if in doubt. + downloadarchive: File name of a file where all downloads are recorded. + Videos already present in the file are not downloaded + again. The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: @@ -309,6 +313,9 @@ class YoutubeDL(object): dateRange = self.params.get('daterange', DateRange()) if date not in dateRange: return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) + if self.in_download_archive(info_dict): + return (u'%(title)s) has already been recorded in archive' + % info_dict) return None def extract_info(self, url, download=True, ie_key=None, extra_info={}): @@ -578,6 +585,8 @@ class YoutubeDL(object): self.report_error(u'postprocessing: %s' % str(err)) return + self.record_download_archive(info_dict) + def download(self, url_list): """Download a given list of URLs.""" if len(url_list) > 1 and self.fixed_template(): @@ -617,3 +626,26 @@ class YoutubeDL(object): os.remove(encodeFilename(filename)) except (IOError, OSError): self.report_warning(u'Unable to remove downloaded video file') + + def in_download_archive(self, info_dict): + fn = self.params.get('download_archive') + if fn is None: + return False + vid_id = info_dict['extractor'] + u' ' + info_dict['id'] + try: + with locked_file(fn, 'r', encoding='utf-8') as archive_file: + for line in archive_file: + if line.strip() == vid_id: + return True + except IOError as ioe: + if ioe.errno != errno.ENOENT: + raise + return False + + def record_download_archive(self, info_dict): + fn = self.params.get('download_archive') + if fn is None: + return + vid_id = info_dict['extractor'] + u' ' + info_dict['id'] + with locked_file(fn, 'a', encoding='utf-8') as archive_file: + archive_file.write(vid_id + u'\n') diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 03df835f2..a680d7c55 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -188,6 +188,9 @@ def parseOpts(overrideArguments=None): selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None) selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None) selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False) + selection.add_option('--download-archive', metavar='FILE', + dest='download_archive', + help='Download only videos not present in the archive file. Record all downloaded videos in it.') authentication.add_option('-u', '--username', @@ -631,6 +634,7 @@ def _real_main(argv=None): 'daterange': date, 'cachedir': opts.cachedir, 'youtube_print_sig_code': opts.youtube_print_sig_code, + 'download_archive': opts.download_archive, }) if opts.verbose: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f5f9cde99..a463049a4 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -830,3 +830,99 @@ def get_cachedir(params={}): cache_root = os.environ.get('XDG_CACHE_HOME', os.path.expanduser('~/.cache')) return params.get('cachedir', os.path.join(cache_root, 'youtube-dl')) + + +# Cross-platform file locking +if sys.platform == 'win32': + import ctypes.wintypes + import msvcrt + + class OVERLAPPED(ctypes.Structure): + _fields_ = [ + ('Internal', ctypes.wintypes.LPVOID), + ('InternalHigh', ctypes.wintypes.LPVOID), + ('Offset', ctypes.wintypes.DWORD), + ('OffsetHigh', ctypes.wintypes.DWORD), + ('hEvent', ctypes.wintypes.HANDLE), + ] + + kernel32 = ctypes.windll.kernel32 + LockFileEx = kernel32.LockFileEx + LockFileEx.argtypes = [ + ctypes.wintypes.HANDLE, # hFile + ctypes.wintypes.DWORD, # dwFlags + ctypes.wintypes.DWORD, # dwReserved + ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow + ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh + ctypes.POINTER(OVERLAPPED) # Overlapped + ] + LockFileEx.restype = ctypes.wintypes.BOOL + UnlockFileEx = kernel32.UnlockFileEx + UnlockFileEx.argtypes = [ + ctypes.wintypes.HANDLE, # hFile + ctypes.wintypes.DWORD, # dwReserved + ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow + ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh + ctypes.POINTER(OVERLAPPED) # Overlapped + ] + UnlockFileEx.restype = ctypes.wintypes.BOOL + whole_low = 0xffffffff + whole_high = 0x7fffffff + + def _lock_file(f, exclusive): + overlapped = OVERLAPPED() + overlapped.Offset = 0 + overlapped.OffsetHigh = 0 + overlapped.hEvent = 0 + f._lock_file_overlapped_p = ctypes.pointer(overlapped) + handle = msvcrt.get_osfhandle(f.fileno()) + if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0, + whole_low, whole_high, f._lock_file_overlapped_p): + raise OSError('Locking file failed: %r' % ctypes.FormatError()) + + def _unlock_file(f): + assert f._lock_file_overlapped_p + handle = msvcrt.get_osfhandle(f.fileno()) + if not UnlockFileEx(handle, 0, + whole_low, whole_high, f._lock_file_overlapped_p): + raise OSError('Unlocking file failed: %r' % ctypes.FormatError()) + +else: + import fcntl + + def _lock_file(f, exclusive): + fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH) + + def _unlock_file(f): + fcntl.lockf(f, fcntl.LOCK_UN) + + +class locked_file(object): + def __init__(self, filename, mode, encoding=None): + assert mode in ['r', 'a', 'w'] + self.f = io.open(filename, mode, encoding=encoding) + self.mode = mode + + def __enter__(self): + exclusive = self.mode != 'r' + try: + _lock_file(self.f, exclusive) + except IOError: + self.f.close() + raise + return self + + def __exit__(self, etype, value, traceback): + try: + _unlock_file(self.f) + finally: + self.f.close() + + def __iter__(self): + return iter(self.f) + + def write(self, *args): + return self.f.write(*args) + + def read(self, *args): + return self.f.read(*args) From f4aac741d5c98c4350dda478fa4564144d99d13a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 6 Oct 2013 05:47:17 +0200 Subject: [PATCH 061/264] Move try_rm to test helpers --- test/helper.py | 22 +++++++++++++++++++--- test/test_download.py | 23 +++++++---------------- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/test/helper.py b/test/helper.py index 8e641e3cb..884cf32dc 100644 --- a/test/helper.py +++ b/test/helper.py @@ -1,3 +1,4 @@ +import errno import io import json import os.path @@ -22,18 +23,33 @@ PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "para with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: parameters = json.load(pf) + +def try_rm(filename): + """ Remove a file if it exists """ + try: + os.remove(filename) + except OSError as ose: + if ose.errno != errno.ENOENT: + raise + + class FakeYDL(YoutubeDL): def __init__(self): - self.result = [] # Different instances of the downloader can't share the same dictionary # some test set the "sublang" parameter, which would break the md5 checks. - self.params = dict(parameters) - def to_screen(self, s): + params = dict(parameters) + super(FakeYDL, self).__init__(params) + self.result = [] + + def to_screen(self, s, skip_eol=None): print(s) + def trouble(self, s, tb=None): raise Exception(s) + def download(self, x): self.result.append(x) + def expect_warning(self, regex): # Silence an expected warning matching a regex old_report_warning = self.report_warning diff --git a/test/test_download.py b/test/test_download.py index 23a66254d..23d3853c4 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -1,6 +1,5 @@ #!/usr/bin/env python -import errno import hashlib import io import os @@ -28,14 +27,6 @@ opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, You compat_urllib_request.install_opener(opener) socket.setdefaulttimeout(10) -def _try_rm(filename): - """ Remove a file if it exists """ - try: - os.remove(filename) - except OSError as ose: - if ose.errno != errno.ENOENT: - raise - md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() class YoutubeDL(youtube_dl.YoutubeDL): @@ -54,7 +45,7 @@ def _file_md5(fn): with open(fn, 'rb') as f: return hashlib.md5(f.read()).hexdigest() -from helper import get_testcases +from helper import get_testcases, try_rm defs = get_testcases() with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: @@ -97,9 +88,9 @@ def generator(test_case): test_cases = test_case.get('playlist', [test_case]) for tc in test_cases: - _try_rm(tc['file']) - _try_rm(tc['file'] + '.part') - _try_rm(tc['file'] + '.info.json') + try_rm(tc['file']) + try_rm(tc['file'] + '.part') + try_rm(tc['file'] + '.info.json') try: for retry in range(1, RETRIES + 1): try: @@ -145,9 +136,9 @@ def generator(test_case): self.assertTrue(key in info_dict.keys() and info_dict[key]) finally: for tc in test_cases: - _try_rm(tc['file']) - _try_rm(tc['file'] + '.part') - _try_rm(tc['file'] + '.info.json') + try_rm(tc['file']) + try_rm(tc['file'] + '.part') + try_rm(tc['file'] + '.info.json') return test_template From 8dbe9899a985a04690e467510c94c14f3314843b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 6 Oct 2013 06:06:30 +0200 Subject: [PATCH 062/264] Allow users to specify an age limit (fixes #1545) With these changes, users can now restrict what videos are downloaded by the intented audience, by specifying their age with --age-limit YEARS . Add rudimentary support in youtube, pornotube, and youporn. --- test/test_age_restriction.py | 53 +++++++++++++++++++++++++++++++ youtube_dl/YoutubeDL.py | 6 ++++ youtube_dl/__init__.py | 4 +++ youtube_dl/extractor/common.py | 10 ++++++ youtube_dl/extractor/pornotube.py | 4 ++- youtube_dl/extractor/youporn.py | 4 ++- youtube_dl/extractor/youtube.py | 3 +- 7 files changed, 81 insertions(+), 3 deletions(-) create mode 100644 test/test_age_restriction.py diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py new file mode 100644 index 000000000..943f9a315 --- /dev/null +++ b/test/test_age_restriction.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python + +import sys +import unittest + +# Allow direct execution +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl import YoutubeDL +from helper import try_rm + + +def _download_restricted(url, filename, age): + """ Returns true iff the file has been downloaded """ + + params = { + 'age_limit': age, + 'skip_download': True, + 'writeinfojson': True, + "outtmpl": "%(id)s.%(ext)s", + } + ydl = YoutubeDL(params) + ydl.add_default_info_extractors() + json_filename = filename + '.info.json' + try_rm(json_filename) + ydl.download([url]) + res = os.path.exists(json_filename) + try_rm(json_filename) + return res + + +class TestAgeRestriction(unittest.TestCase): + def _assert_restricted(self, url, filename, age, old_age=None): + self.assertTrue(_download_restricted(url, filename, old_age)) + self.assertFalse(_download_restricted(url, filename, age)) + + def test_youtube(self): + self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10) + + def test_youporn(self): + self._assert_restricted( + 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', + '505835.mp4', 2, old_age=25) + + def test_pornotube(self): + self._assert_restricted( + 'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing', + '1689755.flv', 13) + + +if __name__ == '__main__': + unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 2503fd09b..6258c141e 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -84,6 +84,8 @@ class YoutubeDL(object): cachedir: Location of the cache files in the filesystem. None to disable filesystem cache. noplaylist: Download single video instead of a playlist if in doubt. + age_limit: An integer representing the user's age in years. + Unsuitable videos for the given age are skipped. The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: @@ -309,6 +311,10 @@ class YoutubeDL(object): dateRange = self.params.get('daterange', DateRange()) if date not in dateRange: return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) + age_limit = self.params.get('age_limit') + if age_limit is not None: + if age_limit < info_dict.get('age_restriction', 0): + return u'Skipping "' + title + '" because it is age restricted' return None def extract_info(self, url, download=True, ie_key=None, extra_info={}): diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 03df835f2..7a399273a 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -188,6 +188,9 @@ def parseOpts(overrideArguments=None): selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None) selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None) selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False) + selection.add_option('--age-limit', metavar='YEARS', dest='age_limit', + help='download only videos suitable for the given age', + default=None, type=int) authentication.add_option('-u', '--username', @@ -631,6 +634,7 @@ def _real_main(argv=None): 'daterange': date, 'cachedir': opts.cachedir, 'youtube_print_sig_code': opts.youtube_print_sig_code, + 'age_limit': opts.age_limit, }) if opts.verbose: diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 69cdcdc1b..2a5a85dc6 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -54,6 +54,7 @@ class InfoExtractor(object): view_count: How many users have watched the video on the platform. urlhandle: [internal] The urlHandle to be used to download the file, like returned by urllib.request.urlopen + age_limit: Age restriction for the video, as an integer (years) formats: A list of dictionaries for each format available, it must be ordered from worst to best quality. Potential fields: * url Mandatory. The URL of the video file @@ -318,6 +319,15 @@ class InfoExtractor(object): self._og_regex('video')], html, name, **kargs) + def _rta_search(self, html): + # See http://www.rtalabel.org/index.php?content=howtofaq#single + if re.search(r'(?ix)<meta\s+name="rating"\s+' + r' content="RTA-5042-1996-1400-1577-RTA"', + html): + return 18 + return 0 + + class SearchInfoExtractor(InfoExtractor): """ Base class for paged search queries extractors. diff --git a/youtube_dl/extractor/pornotube.py b/youtube_dl/extractor/pornotube.py index add76a11e..9039dff5a 100644 --- a/youtube_dl/extractor/pornotube.py +++ b/youtube_dl/extractor/pornotube.py @@ -38,6 +38,7 @@ class PornotubeIE(InfoExtractor): VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by' upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False) if upload_date: upload_date = unified_strdate(upload_date) + age_limit = self._rta_search(webpage) info = {'id': video_id, 'url': video_url, @@ -45,6 +46,7 @@ class PornotubeIE(InfoExtractor): 'upload_date': upload_date, 'title': video_title, 'ext': 'flv', - 'format': 'flv'} + 'format': 'flv', + 'age_restriction': age_limit} return [info] diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index c85fd4b5a..e2860ec9d 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -51,6 +51,7 @@ class YouPornIE(InfoExtractor): req = compat_urllib_request.Request(url) req.add_header('Cookie', 'age_verified=1') webpage = self._download_webpage(req, video_id) + age_limit = self._rta_search(webpage) # Get JSON parameters json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters') @@ -115,7 +116,8 @@ class YouPornIE(InfoExtractor): 'ext': extension, 'format': format, 'thumbnail': thumbnail, - 'description': video_description + 'description': video_description, + 'age_restriction': age_limit, }) if self._downloader.params.get('listformats', None): diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1101011ea..9bcd035bd 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1495,7 +1495,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'description': video_description, 'player_url': player_url, 'subtitles': video_subtitles, - 'duration': video_duration + 'duration': video_duration, + 'age_restriction': 18 if age_gate else 0, }) return results From 41e8bca4d0fa3e5284466da2497ef23e09678ccb Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 6 Oct 2013 07:12:47 +0200 Subject: [PATCH 063/264] [viddler] Add basic support (Fixes #1520) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/viddler.py | 64 ++++++++++++++++++++++++++++++++ youtube_dl/utils.py | 2 +- 3 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/viddler.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index d1b7e5f99..2b054e1c9 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -117,6 +117,7 @@ from .veehd import VeeHDIE from .veoh import VeohIE from .vevo import VevoIE from .vice import ViceIE +from .viddler import ViddlerIE from .videofyme import VideofyMeIE from .vimeo import VimeoIE, VimeoChannelIE from .vine import VineIE diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py new file mode 100644 index 000000000..12c84a985 --- /dev/null +++ b/youtube_dl/extractor/viddler.py @@ -0,0 +1,64 @@ +import json +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, +) + + +class ViddlerIE(InfoExtractor): + _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[0-9]+)' + _TEST = { + u"url": u"http://www.viddler.com/v/43903784", + u'file': u'43903784.mp4', + u'md5': u'fbbaedf7813e514eb7ca30410f439ac9', + u'info_dict': { + u"title": u"Video Made Easy", + u"uploader": u"viddler", + u"duration": 100.89, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + embed_url = mobj.group('domain') + u'/embed/' + video_id + webpage = self._download_webpage(embed_url, video_id) + + video_sources_code = self._search_regex( + r"(?ms)sources\s*:\s*(\{.*?\})", webpage, u'video URLs') + video_sources = json.loads(video_sources_code.replace("'", '"')) + + formats = [{ + 'url': video_url, + 'format': format_id, + } for video_url, format_id in video_sources.items()] + + title = self._html_search_regex( + r"title\s*:\s*'([^']*)'", webpage, u'title') + uploader = self._html_search_regex( + r"authorName\s*:\s*'([^']*)'", webpage, u'uploader', fatal=False) + duration_s = self._html_search_regex( + r"duration\s*:\s*([0-9.]*)", webpage, u'duration', fatal=False) + duration = float(duration_s) if duration_s else None + thumbnail = self._html_search_regex( + r"thumbnail\s*:\s*'([^']*)'", + webpage, u'thumbnail', fatal=False) + + info = { + '_type': 'video', + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'duration': duration, + 'formats': formats, + } + + # TODO: Remove when #980 has been merged + info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url']) + info.update(info['formats'][-1]) + + return info diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f5f9cde99..e3feb12bf 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -175,7 +175,7 @@ def compat_ord(c): compiled_regex_type = type(re.compile('')) std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0', + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', From 7e5e8306fdc67d75a995f21f3316256433e2c890 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 6 Oct 2013 07:13:14 +0200 Subject: [PATCH 064/264] release 2013.10.06 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e773e82da..08eda2197 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.04' +__version__ = '2013.10.06' From e484c81f0c0a6faf959037ac03b504e4794d72df Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 6 Oct 2013 16:03:18 +0200 Subject: [PATCH 065/264] [generic] Clarify error messages --- youtube_dl/extractor/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 764070635..7060c6f92 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -117,7 +117,7 @@ class GenericIE(InfoExtractor): except ValueError: # since this is the last-resort InfoExtractor, if # this error is thrown, it'll be thrown here - raise ExtractorError(u'Invalid URL: %s' % url) + raise ExtractorError(u'Failed to download URL: %s' % url) self.report_extraction(video_id) # Look for BrightCove: @@ -149,12 +149,12 @@ class GenericIE(InfoExtractor): # HTML5 video mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL) if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) + raise ExtractorError(u'Unsupported URL: %s' % url) # It's possible that one of the regexes # matched, but returned an empty group: if mobj.group(1) is None: - raise ExtractorError(u'Invalid URL: %s' % url) + raise ExtractorError(u'Did not find a valid video URL at %s' % url) video_url = mobj.group(1) video_url = compat_urlparse.urljoin(url, video_url) From cfadd183c4a281e97fba89501d59ee344bcc5978 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 6 Oct 2013 16:23:06 +0200 Subject: [PATCH 066/264] Call extracted property age_limit everywhere --- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/extractor/pornotube.py | 2 +- youtube_dl/extractor/youporn.py | 2 +- youtube_dl/extractor/youtube.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 6258c141e..9ada01bcc 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -313,7 +313,7 @@ class YoutubeDL(object): return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) age_limit = self.params.get('age_limit') if age_limit is not None: - if age_limit < info_dict.get('age_restriction', 0): + if age_limit < info_dict.get('age_limit', 0): return u'Skipping "' + title + '" because it is age restricted' return None diff --git a/youtube_dl/extractor/pornotube.py b/youtube_dl/extractor/pornotube.py index 9039dff5a..5d770ec28 100644 --- a/youtube_dl/extractor/pornotube.py +++ b/youtube_dl/extractor/pornotube.py @@ -47,6 +47,6 @@ class PornotubeIE(InfoExtractor): 'title': video_title, 'ext': 'flv', 'format': 'flv', - 'age_restriction': age_limit} + 'age_limit': age_limit} return [info] diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index e2860ec9d..b1f93dd1b 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -117,7 +117,7 @@ class YouPornIE(InfoExtractor): 'format': format, 'thumbnail': thumbnail, 'description': video_description, - 'age_restriction': age_limit, + 'age_limit': age_limit, }) if self._downloader.params.get('listformats', None): diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9bcd035bd..b02ae2572 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1496,7 +1496,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'player_url': player_url, 'subtitles': video_subtitles, 'duration': video_duration, - 'age_restriction': 18 if age_gate else 0, + 'age_limit': 18 if age_gate else 0, }) return results From ee6c9f95e1e5cf118b0bdf6abc8376bd95bc7dcf Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 6 Oct 2013 16:28:36 +0200 Subject: [PATCH 067/264] Remove superfluous parenthesis --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 1f5f75e30..856e9ac92 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -314,7 +314,7 @@ class YoutubeDL(object): if date not in dateRange: return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) if self.in_download_archive(info_dict): - return (u'%(title)s) has already been recorded in archive' + return (u'%(title)s has already been recorded in archive' % info_dict) return None From 1310bf247445148731dcfe1883b18fbf78795e9e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 6 Oct 2013 16:39:35 +0200 Subject: [PATCH 068/264] [redtube] add age_limit --- youtube_dl/extractor/redtube.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index bb19b898a..365aade56 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -30,9 +30,14 @@ class RedTubeIE(InfoExtractor): r'<h1 class="videoTitle slidePanelMovable">(.+?)</h1>', webpage, u'title') + # No self-labeling, but they describe themselves as + # "Home of Videos Porno" + age_limit = 18 + return { - 'id': video_id, - 'url': video_url, - 'ext': video_extension, - 'title': video_title, + 'id': video_id, + 'url': video_url, + 'ext': video_extension, + 'title': video_title, + 'age_limit': age_limit, } From ad7a071ab678d8ec5a2cee21efbf1a88a8ff8544 Mon Sep 17 00:00:00 2001 From: rzhxeo <rzhxeot7z81b4700@mailcatch.com> Date: Sun, 6 Oct 2013 20:55:24 +0200 Subject: [PATCH 069/264] Only download 1 sec. with rtmpdump in test mode --- youtube_dl/FileDownloader.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index d6673fd3a..f1ff0b520 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -267,7 +267,7 @@ class FileDownloader(object): self.to_screen(u'\r%s[download] 100%% of %s in %s' % (clear_line, data_len_str, self.format_seconds(tot_time))) - def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url): + def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, test): self.report_destination(filename) tmpfilename = self.temp_name(filename) @@ -291,6 +291,8 @@ class FileDownloader(object): basic_args += ['--playpath', play_path] if tc_url is not None: basic_args += ['--tcUrl', url] + if test: + basic_args += ['--stop', '1'] args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)] if self.params.get('verbose', False): try: @@ -408,7 +410,8 @@ class FileDownloader(object): info_dict.get('player_url', None), info_dict.get('page_url', None), info_dict.get('play_path', None), - info_dict.get('tc_url', None)) + info_dict.get('tc_url', None), + self.params.get('test', False)) # Attempt to download using mplayer if url.startswith('mms') or url.startswith('rtsp'): From 387ae5f30b5490bf2ffcdcb1c9e07f0845934ceb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 6 Oct 2013 21:56:23 +0200 Subject: [PATCH 070/264] [vimeo] Recognize urls ending in a slash (fixes #1242) --- youtube_dl/extractor/vimeo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 4a7d82b7a..cea29f035 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -17,7 +17,7 @@ class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$' + _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?$' _NETRC_MACHINE = 'vimeo' IE_NAME = u'vimeo' _TESTS = [ From 8e4f824365543e394286742fcdb4c0a548becc8e Mon Sep 17 00:00:00 2001 From: rzhxeo <rzhxeot7z81b4700@mailcatch.com> Date: Sun, 6 Oct 2013 22:04:32 +0200 Subject: [PATCH 071/264] Remove test parameter from _download_with_rtmpdump --- youtube_dl/FileDownloader.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index f1ff0b520..2cda5d52a 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -267,7 +267,7 @@ class FileDownloader(object): self.to_screen(u'\r%s[download] 100%% of %s in %s' % (clear_line, data_len_str, self.format_seconds(tot_time))) - def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, test): + def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url): self.report_destination(filename) tmpfilename = self.temp_name(filename) @@ -291,7 +291,7 @@ class FileDownloader(object): basic_args += ['--playpath', play_path] if tc_url is not None: basic_args += ['--tcUrl', url] - if test: + if self.params.get('test', False): basic_args += ['--stop', '1'] args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)] if self.params.get('verbose', False): @@ -410,8 +410,7 @@ class FileDownloader(object): info_dict.get('player_url', None), info_dict.get('page_url', None), info_dict.get('play_path', None), - info_dict.get('tc_url', None), - self.params.get('test', False)) + info_dict.get('tc_url', None)) # Attempt to download using mplayer if url.startswith('mms') or url.startswith('rtsp'): From 15870e90b0aa7fe73040936a2ef4e41cf5eed931 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 7 Oct 2013 12:21:24 +0200 Subject: [PATCH 072/264] Restore warning when user forgets to quote URL (#1396) --- youtube_dl/__init__.py | 2 ++ youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/youtube.py | 20 ++++++++++++++++---- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index ba5206387..db4c58885 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -484,6 +484,8 @@ def _real_main(argv=None): if not ie._WORKING: continue desc = getattr(ie, 'IE_DESC', ie.IE_NAME) + if desc is False: + continue if hasattr(ie, 'SEARCH_KEY'): _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise') _COUNTS = (u'', u'5', u'10', u'all') diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 2b054e1c9..c01de6b5e 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -141,6 +141,7 @@ from .youtube import ( YoutubeShowIE, YoutubeSubscriptionsIE, YoutubeRecommendedIE, + YoutubeTruncatedURLIE, YoutubeWatchLaterIE, YoutubeFavouritesIE, ) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b02ae2572..35310b39f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1250,9 +1250,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): return url_map def _real_extract(self, url): - if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url): - self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).') - # Extract original video URL from URL with redirection, like age verification, using next_url parameter mobj = re.search(self._NEXT_URL_RE, url) if mobj: @@ -1637,7 +1634,7 @@ class YoutubeChannelIE(InfoExtractor): class YoutubeUserIE(InfoExtractor): IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)' - _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' + _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!watch(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' _GDATA_PAGE_SIZE = 50 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' @@ -1830,3 +1827,18 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos') playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id') return self.url_result(playlist_id, 'YoutubePlaylist') + + +class YoutubeTruncatedURLIE(InfoExtractor): + IE_NAME = 'youtube:truncated_url' + IE_DESC = False # Do not list + _VALID_URL = r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$' + + def _real_extract(self, url): + raise ExtractorError( + u'Did you forget to quote the URL? Remember that & is a meta ' + u'character in most shells, so you want to put the URL in quotes, ' + u'like youtube-dl ' + u'\'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\'' + u' (or simply youtube-dl BaW_jenozKc ).', + expected=True) From faa6ef6bc826c03b39db49ed5b4b76960e46970e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 7 Oct 2013 14:33:23 +0200 Subject: [PATCH 073/264] [jeuxvideo] Improve code quality (fixes #1567) --- youtube_dl/extractor/jeuxvideo.py | 37 ++++++++++++++++++------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py index ae2e37a70..6bb54b932 100644 --- a/youtube_dl/extractor/jeuxvideo.py +++ b/youtube_dl/extractor/jeuxvideo.py @@ -6,6 +6,7 @@ import xml.etree.ElementTree from .common import InfoExtractor + class JeuxVideoIE(InfoExtractor): _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm' @@ -23,25 +24,29 @@ class JeuxVideoIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) title = re.match(self._VALID_URL, url).group(1) webpage = self._download_webpage(url, title) - m_download = re.search(r'<param name="flashvars" value="config=(.*?)" />', webpage) - - xml_link = m_download.group(1) + xml_link = self._html_search_regex( + r'<param name="flashvars" value="config=(.*?)" />', + webpage, u'config URL') - id = re.search(r'http://www.jeuxvideo.com/config/\w+/0011/(.*?)/\d+_player\.xml', xml_link).group(1) + video_id = self._search_regex( + r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml', + xml_link, u'video ID') - xml_config = self._download_webpage(xml_link, title, - 'Downloading XML config') + xml_config = self._download_webpage( + xml_link, title, u'Downloading XML config') config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8')) - info = re.search(r'<format\.json>(.*?)</format\.json>', - xml_config, re.MULTILINE|re.DOTALL).group(1) - info = json.loads(info)['versions'][0] + info_json = self._search_regex( + r'(?sm)<format\.json>(.*?)</format\.json>', + xml_config, u'JSON information') + info = json.loads(info_json)['versions'][0] video_url = 'http://video720.jeuxvideo.com/' + info['file'] - return {'id': id, - 'title' : config.find('titre_video').text, - 'ext' : 'mp4', - 'url' : video_url, - 'description': self._og_search_description(webpage), - 'thumbnail': config.find('image').text, - } + return { + 'id': video_id, + 'title': config.find('titre_video').text, + 'ext': 'mp4', + 'url': video_url, + 'description': self._og_search_description(webpage), + 'thumbnail': config.find('image').text, + } From 4481a754e454eebb3688f048639c21890189681b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 7 Oct 2013 14:34:19 +0200 Subject: [PATCH 074/264] release 2013.10.07 --- README.md | 3 +++ youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 14d62b189..8824daee2 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,9 @@ which means you can modify it, redistribute it or use it however you like. --datebefore DATE download only videos uploaded before this date --dateafter DATE download only videos uploaded after this date --no-playlist download only the currently playing video + --age-limit YEARS download only videos suitable for the given age + --download-archive FILE Download only videos not present in the archive + file. Record all downloaded videos in it. ## Download Options: -r, --rate-limit LIMIT maximum download rate (e.g. 50k or 44.6m) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 08eda2197..8b4f03308 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.06' +__version__ = '2013.10.07' From a27b9e8bd592c880e65ab6bb3e15e1f5f8727cd8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 7 Oct 2013 18:50:26 +0200 Subject: [PATCH 075/264] Move opener setup into a separate helper function --- youtube_dl/__init__.py | 54 ++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index db4c58885..9594fd892 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -36,6 +36,7 @@ __authors__ = ( __license__ = 'Public Domain' import codecs +import collections import getpass import optparse import os @@ -447,27 +448,7 @@ def _real_main(argv=None): all_urls = batchurls + args all_urls = [url.strip() for url in all_urls] - # General configuration - cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) - if opts.proxy is not None: - if opts.proxy == '': - proxies = {} - else: - proxies = {'http': opts.proxy, 'https': opts.proxy} - else: - proxies = compat_urllib_request.getproxies() - # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805) - if 'http' in proxies and 'https' not in proxies: - proxies['https'] = proxies['http'] - proxy_handler = compat_urllib_request.ProxyHandler(proxies) - https_handler = make_HTTPS_handler(opts) - opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler()) - # Delete the default user-agent header, which would otherwise apply in - # cases where our custom HTTP handler doesn't come into play - # (See https://github.com/rg3/youtube-dl/issues/1309 for details) - opener.addheaders =[] - compat_urllib_request.install_opener(opener) - socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) + _setup_opener(jar=jar, opts=opts) extractors = gen_extractors() @@ -698,6 +679,37 @@ def _real_main(argv=None): sys.exit(retcode) + +def _setup_opener(jar=None, opts=None, timeout=300): + if opts is None: + FakeOptions = collections.namedtuple( + 'FakeOptions', ['proxy', 'no_check_certificate']) + opts = FakeOptions(proxy=None, no_check_certificate=False) + + cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) + if opts.proxy is not None: + if opts.proxy == '': + proxies = {} + else: + proxies = {'http': opts.proxy, 'https': opts.proxy} + else: + proxies = compat_urllib_request.getproxies() + # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805) + if 'http' in proxies and 'https' not in proxies: + proxies['https'] = proxies['http'] + proxy_handler = compat_urllib_request.ProxyHandler(proxies) + https_handler = make_HTTPS_handler(opts) + opener = compat_urllib_request.build_opener( + https_handler, proxy_handler, cookie_processor, YoutubeDLHandler()) + # Delete the default user-agent header, which would otherwise apply in + # cases where our custom HTTP handler doesn't come into play + # (See https://github.com/rg3/youtube-dl/issues/1309 for details) + opener.addheaders = [] + compat_urllib_request.install_opener(opener) + socket.setdefaulttimeout(timeout) + return opener + + def main(argv=None): try: _real_main(argv) From b2ad967e4561f0bcb73fca3281341751471cab91 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 7 Oct 2013 19:06:36 +0200 Subject: [PATCH 076/264] Simplify test setup --- test/helper.py | 7 +------ test/test_download.py | 9 +-------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/test/helper.py b/test/helper.py index 884cf32dc..ad1b74dd3 100644 --- a/test/helper.py +++ b/test/helper.py @@ -12,12 +12,7 @@ from youtube_dl.utils import ( compat_urllib_request, ) -# General configuration (from __init__, not very elegant...) -jar = compat_cookiejar.CookieJar() -cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) -proxy_handler = compat_urllib_request.ProxyHandler() -opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) -compat_urllib_request.install_opener(opener) +youtube_dl._setup_opener(timeout=10) PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: diff --git a/test/test_download.py b/test/test_download.py index 23d3853c4..fdf59bb5c 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -19,14 +19,6 @@ PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "para RETRIES = 3 -# General configuration (from __init__, not very elegant...) -jar = compat_cookiejar.CookieJar() -cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) -proxy_handler = compat_urllib_request.ProxyHandler() -opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) -compat_urllib_request.install_opener(opener) -socket.setdefaulttimeout(10) - md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() class YoutubeDL(youtube_dl.YoutubeDL): @@ -45,6 +37,7 @@ def _file_md5(fn): with open(fn, 'rb') as f: return hashlib.md5(f.read()).hexdigest() +import helper # Set up remaining global configuration from helper import get_testcases, try_rm defs = get_testcases() From 2ae3edb1cfa5b0afc2e6b6e2d4ea470dadf655a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 7 Oct 2013 21:10:31 +0200 Subject: [PATCH 077/264] Fix the printing of the proxy map in debug mode The proxies have to be extracted from the opener.handlers --- youtube_dl/__init__.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 9594fd892..3ff78daac 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -448,7 +448,7 @@ def _real_main(argv=None): all_urls = batchurls + args all_urls = [url.strip() for url in all_urls] - _setup_opener(jar=jar, opts=opts) + opener = _setup_opener(jar=jar, opts=opts) extractors = gen_extractors() @@ -641,7 +641,12 @@ def _real_main(argv=None): except: pass write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n') - write_string(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n') + + proxy_map = {} + for handler in opener.handlers: + if hasattr(handler, 'proxies'): + proxy_map.update(handler.proxies) + write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n') ydl.add_default_info_extractors() From 88bd97e34c91a86dfe7dd01a9677b76ef43e1b66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Tue, 8 Oct 2013 21:23:55 +0200 Subject: [PATCH 078/264] [vevo] Some improvements (fixes #1580) Extract the info from http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc={id} Some videos don't have an smil manifest, extract the video urls directly from the json and use the last version of the video. Extract all the available formats and set the 'formats' field of the result --- youtube_dl/extractor/vevo.py | 68 ++++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 70408c4f0..1c1cc418d 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -1,11 +1,15 @@ import re import json +import xml.etree.ElementTree +import datetime from .common import InfoExtractor from ..utils import ( + determine_ext, ExtractorError, ) + class VevoIE(InfoExtractor): """ Accepts urls from vevo.com or in the format 'vevo:{id}' @@ -15,11 +19,11 @@ class VevoIE(InfoExtractor): _TEST = { u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', u'file': u'GB1101300280.mp4', - u'md5': u'06bea460acb744eab74a9d7dcb4bfd61', u'info_dict': { u"upload_date": u"20130624", u"uploader": u"Hurts", - u"title": u"Somebody to Die For" + u"title": u"Somebody to Die For", + u'duration': 230, } } @@ -27,27 +31,47 @@ class VevoIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - json_url = 'http://www.vevo.com/data/video/%s' % video_id - base_url = 'http://smil.lvl3.vevo.com' - videos_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (base_url, video_id, video_id.lower()) + json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id info_json = self._download_webpage(json_url, video_id, u'Downloading json info') - links_webpage = self._download_webpage(videos_url, video_id, u'Downloading videos urls') self.report_extraction(video_id) - video_info = json.loads(info_json) - m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):/?(?P<url>.*?)"', links_webpage)) - if m_urls is None or len(m_urls) == 0: - raise ExtractorError(u'Unable to extract video url') - # They are sorted from worst to best quality - m_url = m_urls[-1] - video_url = base_url + '/' + m_url.group('url') - ext = m_url.group('ext') + video_info = json.loads(info_json)['video'] + last_version = {'version': -1} + for version in video_info['videoVersions']: + # These are the HTTP downloads, other types are for different manifests + if version['sourceType'] == 2: + if version['version'] > last_version['version']: + last_version = version + if last_version['version'] == -1: + raise ExtractorError(u'Unable to extract last version of the video') - return {'url': video_url, - 'ext': ext, - 'id': video_id, - 'title': video_info['title'], - 'thumbnail': video_info['img'], - 'upload_date': video_info['launchDate'].replace('/',''), - 'uploader': video_info['Artists'][0]['title'], - } + renditions = xml.etree.ElementTree.fromstring(last_version['data']) + formats = [] + # Already sorted from worst to best quality + for rend in renditions.findall('rendition'): + attr = rend.attrib + f_url = attr['url'] + formats.append({ + 'url': f_url, + 'ext': determine_ext(f_url), + 'height': int(attr['frameheight']), + 'width': int(attr['frameWidth']), + }) + + date_epoch = int(self._search_regex( + r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))/1000 + upload_date = datetime.datetime.fromtimestamp(date_epoch) + info = { + 'id': video_id, + 'title': video_info['title'], + 'formats': formats, + 'thumbnail': video_info['imageUrl'], + 'upload_date': upload_date.strftime('%Y%m%d'), + 'uploader': video_info['mainArtists'][0]['artistName'], + 'duration': video_info['duration'], + } + + # TODO: Remove when #980 has been merged + info.update(formats[-1]) + + return info From 1d368c7589908d9e810732f3c8aeecd24f3cce04 Mon Sep 17 00:00:00 2001 From: Tom <eales@live.com> Date: Wed, 9 Oct 2013 21:56:09 +0800 Subject: [PATCH 079/264] Tiny tpo --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 073a3837c..e85e03fa4 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -119,7 +119,7 @@ class YoutubeDL(object): and not params['restrictfilenames']): # On Python 3, the Unicode filesystem API will throw errors (#1474) self.report_warning( - u'Assuming --restrict-filenames isnce file system encoding ' + u'Assuming --restrict-filenames since file system encoding ' u'cannot encode all charactes. ' u'Set the LC_ALL environment variable to fix this.') params['restrictfilenames'] = True From a34c2faae4315f8c5ea6ef8ea2cc6dc063cb0149 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Wed, 9 Oct 2013 16:41:36 +0200 Subject: [PATCH 080/264] [youtube] set the 'name' parameter in the subtitles url (fixes #1577) --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 35310b39f..a7c514513 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1116,6 +1116,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'lang': lang, 'v': video_id, 'fmt': self._downloader.params.get('subtitlesformat'), + 'name': l[0], }) url = u'http://www.youtube.com/api/timedtext?' + params sub_lang_list[lang] = url From 57da92b7df21137fc7c02d467365ae2189e0baed Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 9 Oct 2013 23:50:38 +0200 Subject: [PATCH 081/264] [youtube] Do not recognize attribution link as user (Fixes #1573) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index a7c514513..8222a880f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1635,7 +1635,7 @@ class YoutubeChannelIE(InfoExtractor): class YoutubeUserIE(InfoExtractor): IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)' - _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!watch(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' + _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' _GDATA_PAGE_SIZE = 50 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' From 8f1ae18a181eb74f6e592a99774624b96a1c62d3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 9 Oct 2013 23:50:47 +0200 Subject: [PATCH 082/264] release 2013.10.09 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8b4f03308..1004af116 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.07' +__version__ = '2013.10.09' From 2e1fa03bf5b165e930dd68278360b53036326cd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 10 Oct 2013 15:25:11 +0200 Subject: [PATCH 083/264] Add an extractor for video.nhl.com (closes #1586) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/nhl.py | 59 ++++++++++++++++++++++++++++++++ youtube_dl/utils.py | 1 + 3 files changed, 61 insertions(+) create mode 100644 youtube_dl/extractor/nhl.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c01de6b5e..f44468d35 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -80,6 +80,7 @@ from .naver import NaverIE from .nba import NBAIE from .nbc import NBCNewsIE from .newgrounds import NewgroundsIE +from .nhl import NHLIE from .ooyala import OoyalaIE from .orf import ORFIE from .pbs import PBSIE diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py new file mode 100644 index 000000000..f86d9de7e --- /dev/null +++ b/youtube_dl/extractor/nhl.py @@ -0,0 +1,59 @@ +import re +import json +import xml.etree.ElementTree + +from .common import InfoExtractor +from ..utils import ( + compat_urlparse, + compat_urllib_parse, + determine_ext, + unified_strdate, +) + + +class NHLIE(InfoExtractor): + IE_NAME = u'nhl.com' + _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?<=[?&])id=(?P<id>\d+)' + + _TEST = { + u'url': u'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614', + u'file': u'453614.mp4', + u'info_dict': { + u'title': u'Quick clip: Weise 4-3 goal vs Flames', + u'description': u'Dale Weise scores his first of the season to put the Canucks up 4-3.', + u'duration': 18, + u'upload_date': u'20131006', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id + info_json = self._download_webpage(json_url, video_id, + u'Downloading info json') + info_json = info_json.replace('\\\'', '\'') + info = json.loads(info_json)[0] + + initial_video_url = info['publishPoint'] + data = compat_urllib_parse.urlencode({ + 'type': 'fvod', + 'path': initial_video_url.replace('.mp4', '_sd.mp4'), + }) + path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data + path_response = self._download_webpage(path_url, video_id, + u'Downloading final video url') + path_doc = xml.etree.ElementTree.fromstring(path_response) + video_url = path_doc.find('path').text + + join = compat_urlparse.urljoin + return { + 'id': video_id, + 'title': info['name'], + 'url': video_url, + 'ext': determine_ext(video_url), + 'description': info['description'], + 'duration': int(info['duration']), + 'thumbnail': join(join(video_url, '/u/'), info['bigImage']), + 'upload_date': unified_strdate(info['releaseDate'].split('.')[0]), + } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index de2654762..82a1daeb9 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -715,6 +715,7 @@ def unified_strdate(date_str): '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M', '%Y-%m-%dT%H:%M:%SZ', + '%Y-%m-%dT%H:%M:%S', ] for expression in format_expressions: try: From 4193a453c22bea044a0bfb204dfbc1374304a1a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 10 Oct 2013 16:18:02 +0200 Subject: [PATCH 084/264] Don't add extractors with IE_DESC set to False to the page of supported sites. --- devscripts/gh-pages/update-sites.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py index 33f242480..153e15c8a 100755 --- a/devscripts/gh-pages/update-sites.py +++ b/devscripts/gh-pages/update-sites.py @@ -16,10 +16,11 @@ def main(): ie_htmls = [] for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()): ie_html = '<b>{}</b>'.format(ie.IE_NAME) - try: + ie_desc = getattr(ie, 'IE_DESC', None) + if ie_desc is False: + continue + elif ie_desc is not None: ie_html += ': {}'.format(ie.IE_DESC) - except AttributeError: - pass if ie.working() == False: ie_html += ' (Currently broken)' ie_htmls.append('<li>{}</li>'.format(ie_html)) From 63da13e8291e2debce073aea63bcfb710c0f5f1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 10 Oct 2013 19:37:17 +0200 Subject: [PATCH 085/264] Add an extractor for faz.net (closes #1582) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/faz.py | 60 ++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 youtube_dl/extractor/faz.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f44468d35..a4d0c71ec 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -34,6 +34,7 @@ from .eighttracks import EightTracksIE from .escapist import EscapistIE from .exfm import ExfmIE from .facebook import FacebookIE +from .faz import FazIE from .fktv import ( FKTVIE, FKTVPosteckeIE, diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py new file mode 100644 index 000000000..deaa4ed2d --- /dev/null +++ b/youtube_dl/extractor/faz.py @@ -0,0 +1,60 @@ +# encoding: utf-8 +import re +import xml.etree.ElementTree + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + clean_html, + get_element_by_attribute, +) + + +class FazIE(InfoExtractor): + IE_NAME = u'faz.net' + _VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+).html' + + _TEST = { + u'url': u'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html', + u'file': u'12610585.mp4', + u'info_dict': { + u'title': u'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher', + u'description': u'md5:1453fbf9a0d041d985a47306192ea253', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + self.to_screen(video_id) + webpage = self._download_webpage(url, video_id) + config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage, + u'config xml url') + config_xml = self._download_webpage(config_xml_url, video_id, + u'Downloading config xml') + config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8')) + + encodings = config.find('ENCODINGS') + formats = [] + for code in ['LOW', 'HIGH', 'HQ']: + encoding = encodings.find(code) + if encoding is None: + continue + encoding_url = encoding.find('FILENAME').text + formats.append({ + 'url': encoding_url, + 'ext': determine_ext(encoding_url), + 'format_id': code.lower(), + }) + + descr_html = get_element_by_attribute('class', 'Content Copy', webpage) + info = { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'formats': formats, + 'description': clean_html(descr_html), + 'thumbnail': config.find('STILL/STILL_BIG').text, + } + # TODO: Remove when #980 has been merged + info.update(formats[-1]) + return info From 0ab4ff6378b40d35a0bd0e63c3bd9b837c4e6b74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 10 Oct 2013 19:53:44 +0200 Subject: [PATCH 086/264] [mtv] Strip the description There were some tabs and newlines added around the string. --- youtube_dl/extractor/mtv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 001a576a8..e520e2bb4 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -87,7 +87,7 @@ class MTVIE(InfoExtractor): description_node = itemdoc.find('description') if description_node is not None: - description = description_node.text + description = description_node.text.strip() else: description = None From 1cbb27b151cb3c2195a551726c05a8f156c5b8b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 10 Oct 2013 19:55:09 +0200 Subject: [PATCH 087/264] [gamespot] Mark as broken (#1587) --- youtube_dl/extractor/gamespot.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index cd3bbe65f..5edbf678a 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -8,6 +8,7 @@ from ..utils import ( ) class GameSpotIE(InfoExtractor): + _WORKING = False _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?' _TEST = { u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/", From cb354c8f6218ecd722b218a08935f7bd7eecabd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 10 Oct 2013 21:01:45 +0200 Subject: [PATCH 088/264] [yahoo] Download the info from another page The 'meta' field is not always in the video webpage --- youtube_dl/extractor/yahoo.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 5bdd5d591..464b498f5 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -50,6 +50,21 @@ class YahooIE(InfoExtractor): webpage, u'items', flags=re.MULTILINE) items = json.loads(items_json) info = items['mediaItems']['query']['results']['mediaObj'][0] + # The 'meta' field is not always in the video webpage, we request it + # from another page + long_id = info['id'] + query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"' + ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2"' % long_id) + data = compat_urllib_parse.urlencode({ + 'q': query, + 'env': 'prod', + 'format': 'json', + }) + query_result_json = self._download_webpage( + 'http://video.query.yahoo.com/v1/public/yql?' + data, + video_id, u'Downloading video info') + query_result = json.loads(query_result_json) + info = query_result['query']['results']['mediaObj'][0] meta = info['meta'] formats = [] From bc4f29170f7fe1088f63fdc42f225656d3680c5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 11 Oct 2013 11:19:09 +0200 Subject: [PATCH 089/264] Add a PostProcessor for adding metadata to the file (closes #1570) It currently sets the title, the date and the author values. --- youtube_dl/PostProcessor.py | 33 +++++++++++++++++++++++++++++++++ youtube_dl/__init__.py | 5 +++++ 2 files changed, 38 insertions(+) diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py index 3ee1d3c58..fbf8a7f98 100644 --- a/youtube_dl/PostProcessor.py +++ b/youtube_dl/PostProcessor.py @@ -2,6 +2,7 @@ import os import subprocess import sys import time +import datetime from .utils import * @@ -467,3 +468,35 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): os.rename(encodeFilename(temp_filename), encodeFilename(filename)) return True, information + + +class FFmpegMetadataPP(FFmpegPostProcessor): + def run(self, info): + metadata = {} + if info.get('title') is not None: + metadata['title'] = info['title'] + if info.get('upload_date') is not None: + metadata['date'] = info['upload_date'] + if info.get('uploader') is not None: + metadata['artist'] = info['uploader'] + elif info.get('uploader_id') is not None: + metadata['artist'] = info['uploader_id'] + + if not metadata: + self._downloader.to_screen(u'[ffmpeg] There isn\'t any metadata to add') + return True, info + + filename = info['filepath'] + ext = os.path.splitext(filename)[1][1:] + temp_filename = filename + u'.temp' + + options = ['-c', 'copy'] + for (name, value) in metadata.items(): + options.extend(['-metadata', '%s="%s"' % (name, value)]) + options.extend(['-f', ext]) + + self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename) + self.run_ffmpeg(filename, temp_filename, options) + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + return True, info diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 3ff78daac..3513d719f 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -358,6 +358,8 @@ def parseOpts(overrideArguments=None): help='do not overwrite post-processed files; the post-processed files are overwritten by default') postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False, help='embed subtitles in the video (only for mp4 videos)') + postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False, + help='add metadata to the files') parser.add_option_group(general) @@ -651,6 +653,9 @@ def _real_main(argv=None): ydl.add_default_info_extractors() # PostProcessors + # Add the metadata pp first, the other pps will copy it + if opts.addmetadata: + ydl.add_post_processor(FFmpegMetadataPP()) if opts.extractaudio: ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites)) if opts.recodevideo: From 9026dd3858050db071b15db90cd953f7ab3de6c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 11 Oct 2013 12:42:15 +0200 Subject: [PATCH 090/264] Make sure it only runs rtmpdump one time in test mode and return True if the download can be resumed --- youtube_dl/FileDownloader.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 2cda5d52a..8ecabab1a 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -270,6 +270,7 @@ class FileDownloader(object): def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url): self.report_destination(filename) tmpfilename = self.temp_name(filename) + test = self.params.get('test', False) # Check for rtmpdump first try: @@ -291,7 +292,7 @@ class FileDownloader(object): basic_args += ['--playpath', play_path] if tc_url is not None: basic_args += ['--tcUrl', url] - if self.params.get('test', False): + if test: basic_args += ['--stop', '1'] args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)] if self.params.get('verbose', False): @@ -302,7 +303,7 @@ class FileDownloader(object): shell_quote = repr self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args)) retval = subprocess.call(args) - while retval == 2 or retval == 1: + while (retval == 2 or retval == 1) and not test: prevsize = os.path.getsize(encodeFilename(tmpfilename)) self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True) time.sleep(5.0) # This seems to be needed @@ -315,7 +316,7 @@ class FileDownloader(object): self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.') retval = 0 break - if retval == 0: + if retval == 0 or (test and retval == 2): fsize = os.path.getsize(encodeFilename(tmpfilename)) self.to_screen(u'\r[rtmpdump] %s bytes' % fsize) self.try_rename(tmpfilename, filename) From 91dbaef40692a68a53aa74858f538a5699bae9ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 11 Oct 2013 14:33:26 +0200 Subject: [PATCH 091/264] [nhl] Add an extractor for videocenter's categories (#1586) It downloads the last 12 videos. --- test/test_playlists.py | 10 +++ youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/nhl.py | 105 ++++++++++++++++++++++++------- 3 files changed, 94 insertions(+), 23 deletions(-) diff --git a/test/test_playlists.py b/test/test_playlists.py index c33511333..de8bd298a 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -16,6 +16,7 @@ from youtube_dl.extractor import ( UstreamChannelIE, SoundcloudUserIE, LivestreamIE, + NHLVideocenterIE, ) from youtube_dl.utils import * @@ -74,5 +75,14 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['title'], u'TEDCity2.0 (English)') self.assertTrue(len(result['entries']) >= 4) + def test_nhl_videocenter(self): + dl = FakeYDL() + ie = NHLVideocenterIE(dl) + result = ie.extract('http://video.canucks.nhl.com/videocenter/console?catid=999') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], u'999') + self.assertEqual(result['title'], u'Highlights') + self.assertEqual(len(result['entries']), 12) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a4d0c71ec..688196869 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -81,7 +81,7 @@ from .naver import NaverIE from .nba import NBAIE from .nbc import NBCNewsIE from .newgrounds import NewgroundsIE -from .nhl import NHLIE +from .nhl import NHLIE, NHLVideocenterIE from .ooyala import OoyalaIE from .orf import ORFIE from .pbs import PBSIE diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index f86d9de7e..e8d43dd13 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -11,29 +11,14 @@ from ..utils import ( ) -class NHLIE(InfoExtractor): - IE_NAME = u'nhl.com' - _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?<=[?&])id=(?P<id>\d+)' +class NHLBaseInfoExtractor(InfoExtractor): + @staticmethod + def _fix_json(json_string): + return json_string.replace('\\\'', '\'') - _TEST = { - u'url': u'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614', - u'file': u'453614.mp4', - u'info_dict': { - u'title': u'Quick clip: Weise 4-3 goal vs Flames', - u'description': u'Dale Weise scores his first of the season to put the Canucks up 4-3.', - u'duration': 18, - u'upload_date': u'20131006', - }, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id - info_json = self._download_webpage(json_url, video_id, - u'Downloading info json') - info_json = info_json.replace('\\\'', '\'') - info = json.loads(info_json)[0] + def _extract_video(self, info): + video_id = info['id'] + self.report_extraction(video_id) initial_video_url = info['publishPoint'] data = compat_urllib_parse.urlencode({ @@ -57,3 +42,79 @@ class NHLIE(InfoExtractor): 'thumbnail': join(join(video_url, '/u/'), info['bigImage']), 'upload_date': unified_strdate(info['releaseDate'].split('.')[0]), } + + +class NHLIE(NHLBaseInfoExtractor): + IE_NAME = u'nhl.com' + _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?<=[?&])id=(?P<id>\d+)' + + _TEST = { + u'url': u'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614', + u'file': u'453614.mp4', + u'info_dict': { + u'title': u'Quick clip: Weise 4-3 goal vs Flames', + u'description': u'Dale Weise scores his first of the season to put the Canucks up 4-3.', + u'duration': 18, + u'upload_date': u'20131006', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id + info_json = self._download_webpage(json_url, video_id, + u'Downloading info json') + info_json = self._fix_json(info_json) + info = json.loads(info_json)[0] + return self._extract_video(info) + + +class NHLVideocenterIE(NHLBaseInfoExtractor): + IE_NAME = u'nhl.com:videocenter' + IE_DESC = u'Download the first 12 videos from a videocenter category' + _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?' + + @classmethod + def suitable(cls, url): + if NHLIE.suitable(url): + return False + return super(NHLVideocenterIE, cls).suitable(url) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + team = mobj.group('team') + webpage = self._download_webpage(url, team) + cat_id = self._search_regex( + [r'var defaultCatId = "(.+?)";', + r'{statusIndex:0,index:0,.*?id:(.*?),'], + webpage, u'category id') + playlist_title = self._html_search_regex( + r'\?catid=%s">(.*?)</a>' % cat_id, + webpage, u'playlist title', flags=re.DOTALL) + + data = compat_urllib_parse.urlencode({ + 'cid': cat_id, + # This is the default value + 'count': 12, + 'ptrs': 3, + 'format': 'json', + }) + path = '/videocenter/servlets/browse?' + data + request_url = compat_urlparse.urljoin(url, path) + response = self._download_webpage(request_url, playlist_title) + response = self._fix_json(response) + if not response.strip(): + self._downloader.report_warning(u'Got an empty reponse, trying ' + u'adding the "newvideos" parameter') + response = self._download_webpage(request_url + '&newvideos=true', + playlist_title) + response = self._fix_json(response) + videos = json.loads(response) + + return { + '_type': 'playlist', + 'title': playlist_title, + 'id': cat_id, + 'entries': [self._extract_video(i) for i in videos], + } From 3823342d9d0a2c50327aa3e1f85a7e8e1221b0bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 11 Oct 2013 16:33:31 +0200 Subject: [PATCH 092/264] [arte] Prepare for generic format support (#980) --- youtube_dl/extractor/arte.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 69b3b0ad7..4707d7cca 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -109,17 +109,27 @@ class ArteTvIE(InfoExtractor): return any(re.match(r, f['versionCode']) for r in regexes) # Some formats may not be in the same language as the url formats = filter(_match_lang, formats) + # Some formats use the m3u8 protocol + formats = filter(lambda f: f['videoFormat'] != 'M3U8', formats) # We order the formats by quality formats = sorted(formats, key=lambda f: int(f['height'])) # Prefer videos without subtitles in the same language formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f['versionCode']) is None) # Pick the best quality - format_info = formats[-1] - if format_info['mediaType'] == u'rtmp': - info_dict['url'] = format_info['streamer'] - info_dict['play_path'] = 'mp4:' + format_info['url'] - else: - info_dict['url'] = format_info['url'] + def _format(format_info): + info = {'ext': 'flv', + 'width': format_info.get('width'), + 'height': format_info.get('height'), + } + if format_info['mediaType'] == u'rtmp': + info['url'] = format_info['streamer'] + info['play_path'] = 'mp4:' + format_info['url'] + else: + info_dict['url'] = format_info['url'] + return info + info_dict['formats'] = [_format(f) for f in formats] + # TODO: Remove when #980 has been merged + info_dict.update(info_dict['formats'][-1]) return info_dict From dd82ffea0c3a0dcf67f8e9fca7226de3a2899425 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Tue, 2 Jul 2013 10:08:58 +0200 Subject: [PATCH 093/264] Implement format selection in YoutubeDL Now the IEs can set a formats field in the info_dict, with the formats ordered from worst to best quality. It's a list of dicts with the following fields: * Mandatory: url and ext * Optional: format and format_id The format_id is used for choosing which formats have to be downloaded. Now a video result is processed by the method process_video_result. --- youtube_dl/YoutubeDL.py | 80 +++++++++++++++++++++++++++++++++++++---- youtube_dl/__init__.py | 2 +- 2 files changed, 74 insertions(+), 8 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e85e03fa4..feb105861 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -385,13 +385,7 @@ class YoutubeDL(object): result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system if result_type == 'video': ie_result.update(extra_info) - if 'playlist' not in ie_result: - # It isn't part of a playlist - ie_result['playlist'] = None - ie_result['playlist_index'] = None - if download: - self.process_info(ie_result) - return ie_result + return self.process_video_result(ie_result) elif result_type == 'url': # We have to add extra_info to the results because it may be # contained in a playlist @@ -449,6 +443,64 @@ class YoutubeDL(object): else: raise Exception('Invalid result type: %s' % result_type) + def process_video_result(self, info_dict, download=True): + assert info_dict.get('_type', 'video') == 'video' + + if 'playlist' not in info_dict: + # It isn't part of a playlist + info_dict['playlist'] = None + info_dict['playlist_index'] = None + + # We now pick which formats have to be downloaded + if info_dict.get('formats') is None: + # There's only one format available + formats = [info_dict] + else: + formats = info_dict['formats'] + + # We check that all the formats have the format and format_id fields + for (i, format) in enumerate(formats): + if format.get('format') is None: + format['format'] = compat_str(i) + if format.get('format_id') is None: + format['format_id'] = compat_str(i) + + if self.params.get('listformats', None): + self.list_formats(info_dict) + return + + req_format = self.params.get('format', 'best') + formats_to_download = [] + if req_format == 'best' or req_format is None: + formats_to_download = [formats[-1]] + elif req_format == 'worst': + formats_to_download = [formats[0]] + # The -1 is for supporting YoutubeIE + elif req_format in ('-1', 'all'): + formats_to_download = formats + else: + # We can accept formats requestd in the format: 34/10/5, we pick + # the first that is availble, starting from left + req_formats = req_format.split('/') + for rf in req_formats: + matches = filter(lambda f:f['format_id'] == rf ,formats) + if matches: + formats_to_download = [matches[0]] + break + if not formats_to_download: + raise ExtractorError(u'requested format not available') + + if download: + if len(formats_to_download) > 1: + self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download))) + for format in formats_to_download: + new_info = dict(info_dict) + new_info.update(format) + self.process_info(new_info) + # We update the info dict with the best quality format (backwards compatibility) + info_dict.update(formats_to_download[-1]) + return info_dict + def process_info(self, info_dict): """Process a single resolved IE result.""" @@ -655,3 +707,17 @@ class YoutubeDL(object): vid_id = info_dict['extractor'] + u' ' + info_dict['id'] with locked_file(fn, 'a', encoding='utf-8') as archive_file: archive_file.write(vid_id + u'\n') + + def list_formats(self, info_dict): + formats_s = [] + for format in info_dict.get('formats', [info_dict]): + formats_s.append("%s\t:\t%s\t[%s]" % (format['format_id'], + format['ext'], + format.get('format', '???'), + ) + ) + if len(formats_s) != 1: + formats_s[0] += ' (worst)' + formats_s[-1] += ' (best)' + formats_s = "\n".join(formats_s) + self.to_screen(u"[info] Available formats for %s:\nformat code\textension\n%s" % (info_dict['id'], formats_s)) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 3513d719f..bc8e97250 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -208,7 +208,7 @@ def parseOpts(overrideArguments=None): video_format.add_option('-f', '--format', - action='store', dest='format', metavar='FORMAT', + action='store', dest='format', metavar='FORMAT', default='best', help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported') video_format.add_option('--all-formats', action='store_const', dest='format', help='download all available video formats', const='all') From 99e206d508646b183ef315da162147ed6fd75442 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 8 Jul 2013 12:10:47 +0200 Subject: [PATCH 094/264] Implement the max quality option in YoutubeDL --- youtube_dl/YoutubeDL.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index feb105861..d88378dda 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -469,6 +469,10 @@ class YoutubeDL(object): self.list_formats(info_dict) return + format_limit = self.params.get('format_limit', None) + if format_limit: + formats = [f for f in formats if f['format_id'] <= format_limit] + req_format = self.params.get('format', 'best') formats_to_download = [] if req_format == 'best' or req_format is None: From 6ff000b888a3da702a894addd9f9824139fd8c8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 13 Jul 2013 17:51:26 +0200 Subject: [PATCH 095/264] Do not handle format selection for IEs that already handle it --- youtube_dl/YoutubeDL.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index d88378dda..c6235abd3 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -451,6 +451,11 @@ class YoutubeDL(object): info_dict['playlist'] = None info_dict['playlist_index'] = None + # This extractors handle format selection themselves + if info_dict['extractor'] in [u'youtube', u'Youku', u'YouPorn', u'mixcloud']: + self.process_info(info_dict) + return info_dict + # We now pick which formats have to be downloaded if info_dict.get('formats') is None: # There's only one format available From 79819f58f2328cdb08272c55d01965cd8c6624ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 13 Jul 2013 18:19:37 +0200 Subject: [PATCH 096/264] Default 'format' field to {width}x{height} If width is None, use {height}p and if height is None, '???' --- youtube_dl/YoutubeDL.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c6235abd3..829a70ec9 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -466,9 +466,16 @@ class YoutubeDL(object): # We check that all the formats have the format and format_id fields for (i, format) in enumerate(formats): if format.get('format') is None: - format['format'] = compat_str(i) + if format.get('height') is not None: + if format.get('width') is not None: + format_desc = u'%sx%s' % (format['width'], format['height']) + else: + format_desc = u'%sp' % format['height'] + else: + format_desc = compat_str(i) + format['format'] = format_desc if format.get('format_id') is None: - format['format_id'] = compat_str(i) + format['format_id'] = '???' if self.params.get('listformats', None): self.list_formats(info_dict) From e028d0d1e3ffed0a323b41431dbbfc804aa9553e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 14 Jul 2013 17:24:18 +0200 Subject: [PATCH 097/264] Implement the prefer_free_formats in YoutubeDL --- test/test_YoutubeDL.py | 49 +++++++++++++++++++++++++++++++++++++++++ youtube_dl/YoutubeDL.py | 9 ++++++++ 2 files changed, 58 insertions(+) create mode 100644 test/test_YoutubeDL.py diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py new file mode 100644 index 000000000..2b9fb92ee --- /dev/null +++ b/test/test_YoutubeDL.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python + +import sys +import unittest + +# Allow direct execution +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from helper import FakeYDL, parameters + +class YDL(FakeYDL): + def __init__(self): + super(YDL, self).__init__() + self.downloaded_info_dicts = [] + def process_info(self, info_dict): + self.downloaded_info_dicts.append(info_dict) + +class TestFormatSelection(unittest.TestCase): + def test_prefer_free_formats(self): + # Same resolution => download webm + ydl = YDL() + ydl.params['prefer_free_formats'] = True + formats = [{u'ext': u'webm', u'height': 460},{u'ext': u'mp4', u'height': 460}] + info_dict = {u'formats': formats, u'extractor': u'test'} + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded[u'ext'], u'webm') + + # Different resolution => download best quality (mp4) + ydl = YDL() + ydl.params['prefer_free_formats'] = True + formats = [{u'ext': u'webm', u'height': 720},{u'ext': u'mp4',u'height': 1080}] + info_dict[u'formats'] = formats + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded[u'ext'], u'mp4') + + # No prefer_free_formats => keep original formats order + ydl = YDL() + ydl.params['prefer_free_formats'] = False + formats = [{u'ext': u'webm', u'height': 720},{u'ext': u'flv',u'height': 720}] + info_dict[u'formats'] = formats + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded[u'ext'], u'flv') + +if __name__ == '__main__': + unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 829a70ec9..e159aa336 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -484,6 +484,15 @@ class YoutubeDL(object): format_limit = self.params.get('format_limit', None) if format_limit: formats = [f for f in formats if f['format_id'] <= format_limit] + if self.params.get('prefer_free_formats'): + def _free_formats_key(f): + try: + ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext']) + except ValueError: + ext_ord = -1 + # We only compare the extension if they have the same height and width + return (f.get('height'), f.get('width'), ext_ord) + formats = sorted(formats, key=_free_formats_key) req_format = self.params.get('format', 'best') formats_to_download = [] From 8016c9229718080f5211b9f9da176992622b30e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 14 Jul 2013 17:31:52 +0200 Subject: [PATCH 098/264] Fix the default values of format_id and format --- youtube_dl/YoutubeDL.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e159aa336..a32e50772 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -472,10 +472,10 @@ class YoutubeDL(object): else: format_desc = u'%sp' % format['height'] else: - format_desc = compat_str(i) + format_desc = '???' format['format'] = format_desc if format.get('format_id') is None: - format['format_id'] = '???' + format['format_id'] = compat_str(i) if self.params.get('listformats', None): self.list_formats(info_dict) From 8032e31f2dfcccd2a20bc028a6534ac9f89ee10a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 11 Oct 2013 20:36:50 +0200 Subject: [PATCH 099/264] Merge pull request #1558 from rzhxeo/cinemassacre Add support for http://cinemassacre.com --- youtube_dl/extractor/cinemassacre.py | 96 +++++++++++----------------- 1 file changed, 36 insertions(+), 60 deletions(-) diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py index 17a7916cb..6925b96c2 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/cinemassacre.py @@ -6,33 +6,36 @@ from ..utils import ( ExtractorError, ) + class CinemassacreIE(InfoExtractor): _VALID_URL = r'(?:http://)?(?:www\.)?(?P<url>cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?)(?:[/?].*)?' _TESTS = [{ u'url': u'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', - u'file': u'19911.mp4', + u'file': u'19911.flv', u'info_dict': { - u'upload_date': u'20121110', + u'upload_date': u'20121110', u'title': u'“Angry Video Game Nerd: The Movie” – Trailer', u'description': u'md5:fb87405fcb42a331742a0dce2708560b', }, u'params': { + # rtmp download u'skip_download': True, }, }, { u'url': u'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', - u'file': u'521be8ef82b16.mp4', + u'file': u'521be8ef82b16.flv', u'info_dict': { - u'upload_date': u'20131002', + u'upload_date': u'20131002', u'title': u'The Mummy’s Hand (1940)', }, u'params': { + # rtmp download u'skip_download': True, }, }] - def _real_extract(self,url): + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) webpage_url = u'http://' + mobj.group('url') @@ -50,66 +53,39 @@ class CinemassacreIE(InfoExtractor): webpage, u'description', flags=re.DOTALL, fatal=False) if len(video_description) == 0: video_description = None - + playerdata = self._download_webpage(playerdata_url, video_id) - base_url = self._html_search_regex(r'\'streamer\': \'(?P<base_url>rtmp://[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})/(?:vod|Cinemassacre)\'', + base_url = self._html_search_regex(r'\'streamer\': \'(?P<base_url>rtmp://.*?)/(?:vod|Cinemassacre)\'', playerdata, u'base_url') base_url += '/Cinemassacre/' - # Important: The file names in playerdata are not used by the player and even wrong for some videos + # Important: The file names in playerdata are not used by the player and even wrong for some videos sd_file = 'Cinemassacre-%s_high.mp4' % video_id hd_file = 'Cinemassacre-%s.mp4' % video_id video_thumbnail = 'http://image.screenwavemedia.com/Cinemassacre/Cinemassacre-%s_thumb_640x360.jpg' % video_id - - formats = [{ - 'id': video_id, - 'url': base_url + hd_file, - 'format': 'hd', - 'ext': 'mp4', - 'title': video_title, + + formats = [ + { + 'url': base_url + sd_file, + 'ext': 'flv', + 'format': 'sd', + 'format_id': 'sd', + }, + { + 'url': base_url + hd_file, + 'ext': 'flv', + 'format': 'hd', + 'format_id': 'hd', + }, + ] + + info = { + 'id': video_id, + 'title': video_title, + 'formats': formats, 'description': video_description, 'upload_date': video_date, - 'thumbnail': video_thumbnail, - }, - { - 'id': video_id, - 'url': base_url + sd_file, - 'ext': 'mp4', - 'format': 'sd', - 'title': video_title, - 'description': video_description, - 'upload_date': video_date, - 'thumbnail': video_thumbnail, - }] - - if self._downloader.params.get('listformats', None): - self._print_formats(formats) - return - - req_format = self._downloader.params.get('format', 'best') - self.to_screen(u'Format: %s' % req_format) - - if req_format is None or req_format == 'best': - return [formats[0]] - elif req_format == 'worst': - return [formats[-1]] - elif req_format in ('-1', 'all'): - return formats - else: - format = self._specific( req_format, formats ) - if format is None: - raise ExtractorError(u'Requested format not available') - return [format] - - def _print_formats(self, formats): - """Print all available formats""" - print(u'Available formats:') - print(u'ext\t\tformat') - print(u'---------------------------------') - for format in formats: - print(u'%s\t\t%s' % (format['ext'], format['format'])) - - def _specific(self, req_format, formats): - for x in formats: - if x["format"] == req_format: - return x - return None + 'thumbnail': video_thumbnail, + } + # TODO: Remove when #980 has been merged + info.update(formats[-1]) + return info From 32835331496e0a77cf7b21f34b80b2ae6e9142a5 Mon Sep 17 00:00:00 2001 From: Joey Adams <joeyadams3.14159@gmail.com> Date: Fri, 11 Oct 2013 21:52:30 -0400 Subject: [PATCH 100/264] Fix Brightcove detection when another Flash object is on the page The regex used non-greedy match, but alas it failed on input like this: <object class="...> ... class="BrightcoveExperience" It captured two objects and the intervening HTML. This commit fixes this by not allowing a ">" to appear before BrightcoveExperience. Video in question: http://www.harpercollinschildrens.com/feature/petethecat/ --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7060c6f92..d48c84f8d 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -121,7 +121,7 @@ class GenericIE(InfoExtractor): self.report_extraction(video_id) # Look for BrightCove: - m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL) + m_brightcove = re.search(r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL) if m_brightcove is not None: self.to_screen(u'Brightcove video detected.') bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group()) From 0f6d12e43c0adbd362765aa6b6f54c67e034a247 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 12 Oct 2013 13:29:02 +0200 Subject: [PATCH 101/264] Don't set the '-aq' option with the opus format (fixes #1263) --- youtube_dl/PostProcessor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py index fbf8a7f98..07b6895c0 100644 --- a/youtube_dl/PostProcessor.py +++ b/youtube_dl/PostProcessor.py @@ -178,7 +178,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): extension = self._preferredcodec more_opts = [] if self._preferredquality is not None: - if int(self._preferredquality) < 10: + # The opus codec doesn't support the -aq option + if int(self._preferredquality) < 10 and extension != 'opus': more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality] else: more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k'] From 4eb7f1d12e512fa69f90d98b2e6e97fa0c04e7a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 12 Oct 2013 13:49:27 +0200 Subject: [PATCH 102/264] FFmpegPostProcessor: print the command line used if the --verbose option is given --- youtube_dl/PostProcessor.py | 2 ++ youtube_dl/utils.py | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py index fbf8a7f98..0479591f0 100644 --- a/youtube_dl/PostProcessor.py +++ b/youtube_dl/PostProcessor.py @@ -83,6 +83,8 @@ class FFmpegPostProcessor(PostProcessor): + opts + [encodeFilename(self._ffmpeg_filename_argument(out_path))]) + if self._downloader.params.get('verbose', False): + self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd)) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout,stderr = p.communicate() if p.returncode != 0: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 82a1daeb9..0457f3ded 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -9,6 +9,7 @@ import io import json import locale import os +import pipes import platform import re import socket @@ -927,3 +928,7 @@ class locked_file(object): def read(self, *args): return self.f.read(*args) + + +def shell_quote(args): + return ' '.join(map(pipes.quote, args)) From f5e54a1fda6fcc4ef279e54ff6cf63f6eae71bb0 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda <filippo.valsorda@gmail.com> Date: Sat, 12 Oct 2013 13:11:03 -0400 Subject: [PATCH 103/264] add support for NowVideo.ch --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/nowvideo.py | 43 ++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 youtube_dl/extractor/nowvideo.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 226c3a762..bc191a012 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -83,6 +83,7 @@ from .nba import NBAIE from .nbc import NBCNewsIE from .newgrounds import NewgroundsIE from .nhl import NHLIE, NHLVideocenterIE +from .nowvideo import NowVideoIE from .ooyala import OoyalaIE from .orf import ORFIE from .pbs import PBSIE diff --git a/youtube_dl/extractor/nowvideo.py b/youtube_dl/extractor/nowvideo.py new file mode 100644 index 000000000..ab52ad401 --- /dev/null +++ b/youtube_dl/extractor/nowvideo.py @@ -0,0 +1,43 @@ +import re + +from .common import InfoExtractor +from ..utils import compat_urlparse + + +class NowVideoIE(InfoExtractor): + _VALID_URL = r'(?:https?://)?(?:www\.)?nowvideo\.ch/video/(?P<id>\w+)' + _TEST = { + u'url': u'http://www.nowvideo.ch/video/0mw0yow7b6dxa', + u'file': u'0mw0yow7b6dxa.flv', + u'md5': u'f8fbbc8add72bd95b7850c6a02fc8817', + u'info_dict': { + u"title": u"youtubedl test video _BaW_jenozKc.mp4" + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('id') + webpage_url = 'http://www.nowvideo.ch/video/' + video_id + webpage = self._download_webpage(webpage_url, video_id) + + self.report_extraction(video_id) + + video_title = self._html_search_regex(r'<h4>(.*)</h4>', + webpage, u'video title') + + video_key = self._search_regex(r'var fkzd="(.*)";', + webpage, u'video key') + + api_call = "http://www.nowvideo.ch/api/player.api.php?file={0}&numOfErrors=0&cid=1&key={1}".format(video_id, video_key) + api_response = self._download_webpage(api_call, video_id, + u'Downloading API page') + video_url = compat_urlparse.parse_qs(api_response)[u'url'][0] + + return [{ + 'id': video_id, + 'url': video_url, + 'ext': 'flv', + 'title': video_title, + }] From d3f46b9aa5727323182dd845030c9d781e1824fd Mon Sep 17 00:00:00 2001 From: Filippo Valsorda <filippo.valsorda@gmail.com> Date: Sat, 12 Oct 2013 13:17:11 -0400 Subject: [PATCH 104/264] Add support for single-test tox runs Use a sintax like tox test.test_download:TestDownload.test_NowVideo to run the specific test on all the tox environments (Python versions) --- test/__init__.py | 0 test/test_age_restriction.py | 2 +- test/test_all_urls.py | 2 +- test/test_dailymotion_subtitles.py | 2 +- test/test_download.py | 4 ++-- test/test_playlists.py | 2 +- test/test_youtube_lists.py | 2 +- test/test_youtube_subtitles.py | 2 +- tox.ini | 7 +++++-- 9 files changed, 13 insertions(+), 10 deletions(-) create mode 100644 test/__init__.py diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py index 943f9a315..ec3e30572 100644 --- a/test/test_age_restriction.py +++ b/test/test_age_restriction.py @@ -8,7 +8,7 @@ import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl import YoutubeDL -from helper import try_rm +from .helper import try_rm def _download_restricted(url, filename, age): diff --git a/test/test_all_urls.py b/test/test_all_urls.py index ff1c86efe..b28ad000b 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -8,7 +8,7 @@ import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE, JustinTVIE, gen_extractors -from helper import get_testcases +from .helper import get_testcases class TestAllURLsMatching(unittest.TestCase): def setUp(self): diff --git a/test/test_dailymotion_subtitles.py b/test/test_dailymotion_subtitles.py index ed2ad311d..e655d280d 100644 --- a/test/test_dailymotion_subtitles.py +++ b/test/test_dailymotion_subtitles.py @@ -10,7 +10,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl.extractor import DailymotionIE from youtube_dl.utils import * -from helper import FakeYDL +from .helper import FakeYDL md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() diff --git a/test/test_download.py b/test/test_download.py index fdf59bb5c..68da4d984 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -37,8 +37,8 @@ def _file_md5(fn): with open(fn, 'rb') as f: return hashlib.md5(f.read()).hexdigest() -import helper # Set up remaining global configuration -from helper import get_testcases, try_rm +import test.helper as helper # Set up remaining global configuration +from .helper import get_testcases, try_rm defs = get_testcases() with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: diff --git a/test/test_playlists.py b/test/test_playlists.py index de8bd298a..108a4d63b 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -20,7 +20,7 @@ from youtube_dl.extractor import ( ) from youtube_dl.utils import * -from helper import FakeYDL +from .helper import FakeYDL class TestPlaylists(unittest.TestCase): def assertIsPlaylist(self, info): diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 53e65816d..0b5c79030 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -11,7 +11,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE, YoutubeShowIE from youtube_dl.utils import * -from helper import FakeYDL +from .helper import FakeYDL class TestYoutubeLists(unittest.TestCase): def assertIsPlaylist(self,info): diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index f9b0c1ad0..07850385e 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -10,7 +10,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl.extractor import YoutubeIE from youtube_dl.utils import * -from helper import FakeYDL +from .helper import FakeYDL md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() diff --git a/tox.ini b/tox.ini index 53b461fdb..ed01e3386 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,8 @@ [tox] envlist = py26,py27,py33 [testenv] -deps = nose -commands = nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose test +deps = + nose + coverage +commands = nosetests --verbose {posargs:test} # --with-coverage --cover-package=youtube_dl --cover-html + # test.test_download:TestDownload.test_NowVideo From d7e66d39a040886f940f4adf444be71e50e97391 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 12 Oct 2013 21:34:04 +0200 Subject: [PATCH 105/264] Add an extractor for internetvideoarchive.com videos It's used by videodetective.com --- test/test_utils.py | 14 ++++ youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/internetvideoarchive.py | 71 ++++++++++++++++++++ youtube_dl/utils.py | 13 ++++ 4 files changed, 99 insertions(+) create mode 100644 youtube_dl/extractor/internetvideoarchive.py diff --git a/test/test_utils.py b/test/test_utils.py index ff2e9885b..f2c03d421 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -20,6 +20,7 @@ from youtube_dl.utils import ( unified_strdate, find_xpath_attr, get_meta_content, + xpath_with_ns, ) if sys.version_info < (3, 0): @@ -141,5 +142,18 @@ class TestUtil(unittest.TestCase): self.assertEqual(get_meta('description'), u'foo & bar') self.assertEqual(get_meta('author'), 'Plato') + def test_xpath_with_ns(self): + testxml = u'''<root xmlns:media="http://example.com/"> + <media:song> + <media:author>The Author</media:author> + <url>http://server.com/download.mp3</url> + </media:song> + </root>''' + doc = xml.etree.ElementTree.fromstring(testxml) + find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'})) + self.assertTrue(find('media:song') is not None) + self.assertEqual(find('media:song/media:author').text, u'The Author') + self.assertEqual(find('media:song/url').text, u'http://server.com/download.mp3') + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index bc191a012..e50a89149 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -62,6 +62,7 @@ from .ign import IGNIE, OneUPIE from .ina import InaIE from .infoq import InfoQIE from .instagram import InstagramIE +from .internetvideoarchive import InternetVideoArchiveIE from .jeuxvideo import JeuxVideoIE from .jukebox import JukeboxIE from .justintv import JustinTVIE diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py new file mode 100644 index 000000000..52e3f9eec --- /dev/null +++ b/youtube_dl/extractor/internetvideoarchive.py @@ -0,0 +1,71 @@ +import re +import xml.etree.ElementTree + +from .common import InfoExtractor +from ..utils import ( + compat_urlparse, + xpath_with_ns, + determine_ext, +) + + +class InternetVideoArchiveIE(InfoExtractor): + _VALID_URL = r'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?' + + _TEST = { + u'url': u'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247', + u'file': u'452693.mp4', + u'info_dict': { + u'title': u'SKYFALL', + u'description': u'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.', + u'duration': 156, + }, + } + + @staticmethod + def _build_url(query): + return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query + + def _real_extract(self, url): + query = compat_urlparse.urlparse(url).query + query_dic = compat_urlparse.parse_qs(query) + video_id = query_dic['publishedid'][0] + url = self._build_url(query) + + flashconfiguration_xml = self._download_webpage(url, video_id, + u'Downloading flash configuration') + flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8')) + file_url = flashconfiguration.find('file').text + file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx') + info_xml = self._download_webpage(file_url, video_id, + u'Downloading video info') + info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')) + item = info.find('channel/item') + + def _bp(p): + return xpath_with_ns(p, + {'media': 'http://search.yahoo.com/mrss/', + 'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'}) + formats = [] + for content in item.findall(_bp('media:group/media:content')): + attr = content.attrib + f_url = attr['url'] + formats.append({ + 'url': f_url, + 'ext': determine_ext(f_url), + 'width': int(attr['width']), + 'bitrate': int(attr['bitrate']), + }) + formats = sorted(formats, key=lambda f: f['bitrate']) + + info = { + 'id': video_id, + 'title': item.find('title').text, + 'formats': formats, + 'thumbnail': item.find(_bp('media:thumbnail')).attrib['url'], + 'description': item.find('description').text, + 'duration': int(attr['duration']), + } + # TODO: Remove when #980 has been merged + info.update(formats[-1]) + return info diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 0457f3ded..3e81c308b 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -230,6 +230,19 @@ else: return f return None +# On python2.6 the xml.etree.ElementTree.Element methods don't support +# the namespace parameter +def xpath_with_ns(path, ns_map): + components = [c.split(':') for c in path.split('/')] + replaced = [] + for c in components: + if len(c) == 1: + replaced.append(c[0]) + else: + ns, tag = c + replaced.append('{%s}%s' % (ns_map[ns], tag)) + return '/'.join(replaced) + def htmlentity_transform(matchobj): """Transforms an HTML entity to a character. From 3d60d33773e1be28955a74c3491edd13581aeb8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 12 Oct 2013 21:36:17 +0200 Subject: [PATCH 106/264] Add an extractor for videodetective.com (closes #262) It uses the internetvideoarchive.com platform. --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/videodetective.py | 30 ++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 youtube_dl/extractor/videodetective.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e50a89149..0f38bdd54 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -123,6 +123,7 @@ from .veoh import VeohIE from .vevo import VevoIE from .vice import ViceIE from .viddler import ViddlerIE +from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE from .vimeo import VimeoIE, VimeoChannelIE from .vine import VineIE diff --git a/youtube_dl/extractor/videodetective.py b/youtube_dl/extractor/videodetective.py new file mode 100644 index 000000000..265dd5b91 --- /dev/null +++ b/youtube_dl/extractor/videodetective.py @@ -0,0 +1,30 @@ +import re + +from .common import InfoExtractor +from .internetvideoarchive import InternetVideoArchiveIE +from ..utils import ( + compat_urlparse, +) + + +class VideoDetectiveIE(InfoExtractor): + _VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)' + + _TEST = { + u'url': u'http://www.videodetective.com/movies/kick-ass-2/194487', + u'file': u'194487.mp4', + u'info_dict': { + u'title': u'KICK-ASS 2', + u'description': u'md5:65ba37ad619165afac7d432eaded6013', + u'duration': 135, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + og_video = self._og_search_video_url(webpage) + query = compat_urlparse.urlparse(og_video).query + return self.url_result(InternetVideoArchiveIE._build_url(query), + ie=InternetVideoArchiveIE.ie_key()) From 4b7b839f24c6e95a4c1047de1a0a5194ef7f8fce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 12 Oct 2013 22:21:23 +0200 Subject: [PATCH 107/264] Add an extractor for rottentomatoes.com and improve InternetVideoArchiveIE to get the best quality --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/internetvideoarchive.py | 16 ++++++++++++++++ youtube_dl/extractor/rottentomatoes.py | 16 ++++++++++++++++ youtube_dl/extractor/videodetective.py | 2 +- 4 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/rottentomatoes.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0f38bdd54..9dc9651ad 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -94,6 +94,7 @@ from .rbmaradio import RBMARadioIE from .redtube import RedTubeIE from .ringtv import RingTVIE from .ro220 import Ro220IE +from .rottentomatoes import RottenTomatoesIE from .roxwel import RoxwelIE from .rtlnow import RTLnowIE from .sina import SinaIE diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py index 52e3f9eec..5986459d6 100644 --- a/youtube_dl/extractor/internetvideoarchive.py +++ b/youtube_dl/extractor/internetvideoarchive.py @@ -4,6 +4,7 @@ import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( compat_urlparse, + compat_urllib_parse, xpath_with_ns, determine_ext, ) @@ -26,6 +27,16 @@ class InternetVideoArchiveIE(InfoExtractor): def _build_url(query): return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query + @staticmethod + def _clean_query(query): + NEEDED_ARGS = ['publishedid', 'customerid'] + query_dic = compat_urlparse.parse_qs(query) + cleaned_dic = dict((k,v[0]) for (k,v) in query_dic.items() if k in NEEDED_ARGS) + # Other player ids return m3u8 urls + cleaned_dic['playerid'] = '247' + cleaned_dic['videokbrate'] = '100000' + return compat_urllib_parse.urlencode(cleaned_dic) + def _real_extract(self, url): query = compat_urlparse.urlparse(url).query query_dic = compat_urlparse.parse_qs(query) @@ -37,6 +48,11 @@ class InternetVideoArchiveIE(InfoExtractor): flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8')) file_url = flashconfiguration.find('file').text file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx') + # Replace some of the parameters in the query to get the best quality + # and http links (no m3u8 manifests) + file_url = re.sub(r'(?<=\?)(.+)$', + lambda m: self._clean_query(m.group()), + file_url) info_xml = self._download_webpage(file_url, video_id, u'Downloading video info') info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')) diff --git a/youtube_dl/extractor/rottentomatoes.py b/youtube_dl/extractor/rottentomatoes.py new file mode 100644 index 000000000..c79c39413 --- /dev/null +++ b/youtube_dl/extractor/rottentomatoes.py @@ -0,0 +1,16 @@ +from .videodetective import VideoDetectiveIE + + +# It just uses the same method as videodetective.com, +# the internetvideoarchive.com is extracted from the og:video property +class RottenTomatoesIE(VideoDetectiveIE): + _VALID_URL = r'https?://www\.rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)' + + _TEST = { + u'url': u'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', + u'file': '613340.mp4', + u'info_dict': { + u'title': u'TOY STORY 3', + u'description': u'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.', + }, + } diff --git a/youtube_dl/extractor/videodetective.py b/youtube_dl/extractor/videodetective.py index 265dd5b91..d89f84094 100644 --- a/youtube_dl/extractor/videodetective.py +++ b/youtube_dl/extractor/videodetective.py @@ -16,7 +16,7 @@ class VideoDetectiveIE(InfoExtractor): u'info_dict': { u'title': u'KICK-ASS 2', u'description': u'md5:65ba37ad619165afac7d432eaded6013', - u'duration': 135, + u'duration': 138, }, } From c40f5cf45ce896c021ed44fa22d79adbb05eaf5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 13 Oct 2013 13:54:31 +0200 Subject: [PATCH 108/264] [arte] add an extractor for creative.arte.tv (#1593) The +7 videos now use an independent extractor that is also used for the creative videos --- youtube_dl/extractor/__init__.py | 6 +- youtube_dl/extractor/arte.py | 156 +++++++++++++++++-------------- 2 files changed, 93 insertions(+), 69 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 9dc9651ad..837c5834d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -2,7 +2,11 @@ from .appletrailers import AppleTrailersIE from .addanime import AddAnimeIE from .archiveorg import ArchiveOrgIE from .ard import ARDIE -from .arte import ArteTvIE +from .arte import ( + ArteTvIE, + ArteTVPlus7IE, + ArteTVCreativeIE, +) from .auengine import AUEngineIE from .bandcamp import BandcampIE from .bliptv import BlipTVIE, BlipTVUserIE diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 4707d7cca..d296b6d63 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -7,15 +7,14 @@ from ..utils import ( ExtractorError, find_xpath_attr, unified_strdate, + determine_ext, ) +# There are different sources of video in arte.tv, the extraction process +# is different for each one. The videos usually expire in 7 days, so we can't +# add tests. + class ArteTvIE(InfoExtractor): - """ - There are two sources of video in arte.tv: videos.arte.tv and - www.arte.tv/guide, the extraction process is different for each one. - The videos expire in 7 days, so we can't add tests. - """ - _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?' _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html' _LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)' _LIVE_URL = r'index-[0-9]+\.html$' @@ -24,7 +23,7 @@ class ArteTvIE(InfoExtractor): @classmethod def suitable(cls, url): - return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL, cls._LIVEWEB_URL)) + return any(re.match(regex, url) for regex in (cls._VIDEOS_URL, cls._LIVEWEB_URL)) # TODO implement Live Stream # from ..utils import compat_urllib_parse @@ -55,14 +54,6 @@ class ArteTvIE(InfoExtractor): # video_url = u'%s/%s' % (info.get('url'), info.get('path')) def _real_extract(self, url): - mobj = re.match(self._EMISSION_URL, url) - if mobj is not None: - lang = mobj.group('lang') - # This is not a real id, it can be for example AJT for the news - # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal - video_id = mobj.group('id') - return self._extract_emission(url, video_id, lang) - mobj = re.match(self._VIDEOS_URL, url) if mobj is not None: id = mobj.group('id') @@ -80,59 +71,6 @@ class ArteTvIE(InfoExtractor): # self.extractLiveStream(url) # return - def _extract_emission(self, url, video_id, lang): - """Extract from www.arte.tv/guide""" - webpage = self._download_webpage(url, video_id) - json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url') - - json_info = self._download_webpage(json_url, video_id, 'Downloading info json') - self.report_extraction(video_id) - info = json.loads(json_info) - player_info = info['videoJsonPlayer'] - - info_dict = {'id': player_info['VID'], - 'title': player_info['VTI'], - 'description': player_info.get('VDE'), - 'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]), - 'thumbnail': player_info['programImage'], - 'ext': 'flv', - } - - formats = player_info['VSR'].values() - def _match_lang(f): - # Return true if that format is in the language of the url - if lang == 'fr': - l = 'F' - elif lang == 'de': - l = 'A' - regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] - return any(re.match(r, f['versionCode']) for r in regexes) - # Some formats may not be in the same language as the url - formats = filter(_match_lang, formats) - # Some formats use the m3u8 protocol - formats = filter(lambda f: f['videoFormat'] != 'M3U8', formats) - # We order the formats by quality - formats = sorted(formats, key=lambda f: int(f['height'])) - # Prefer videos without subtitles in the same language - formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f['versionCode']) is None) - # Pick the best quality - def _format(format_info): - info = {'ext': 'flv', - 'width': format_info.get('width'), - 'height': format_info.get('height'), - } - if format_info['mediaType'] == u'rtmp': - info['url'] = format_info['streamer'] - info['play_path'] = 'mp4:' + format_info['url'] - else: - info_dict['url'] = format_info['url'] - return info - info_dict['formats'] = [_format(f) for f in formats] - # TODO: Remove when #980 has been merged - info_dict.update(info_dict['formats'][-1]) - - return info_dict - def _extract_video(self, url, video_id, lang): """Extract from videos.arte.tv""" ref_xml_url = url.replace('/videos/', '/do_delegate/videos/') @@ -182,3 +120,85 @@ class ArteTvIE(InfoExtractor): 'ext': 'flv', 'thumbnail': self._og_search_thumbnail(webpage), } + + +class ArteTVPlus7IE(InfoExtractor): + IE_NAME = u'arte.tv:+7' + _VALID_URL = r'https?://www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + lang = mobj.group('lang') + # This is not a real id, it can be for example AJT for the news + # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url') + + json_info = self._download_webpage(json_url, video_id, 'Downloading info json') + self.report_extraction(video_id) + info = json.loads(json_info) + player_info = info['videoJsonPlayer'] + + info_dict = { + 'id': player_info['VID'], + 'title': player_info['VTI'], + 'description': player_info.get('VDE'), + 'upload_date': unified_strdate(player_info.get('VDA', '').split(' ')[0]), + 'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), + } + + formats = player_info['VSR'].values() + def _match_lang(f): + if f.get('versionCode') is None: + return True + # Return true if that format is in the language of the url + if lang == 'fr': + l = 'F' + elif lang == 'de': + l = 'A' + regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] + return any(re.match(r, f['versionCode']) for r in regexes) + # Some formats may not be in the same language as the url + formats = filter(_match_lang, formats) + # Some formats use the m3u8 protocol + formats = filter(lambda f: f.get('videoFormat') != 'M3U8', formats) + # We order the formats by quality + formats = sorted(formats, key=lambda f: int(f.get('height',-1))) + # Prefer videos without subtitles in the same language + formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f.get('versionCode', '')) is None) + # Pick the best quality + def _format(format_info): + info = { + 'width': format_info.get('width'), + 'height': format_info.get('height'), + } + if format_info['mediaType'] == u'rtmp': + info['url'] = format_info['streamer'] + info['play_path'] = 'mp4:' + format_info['url'] + info['ext'] = 'flv' + else: + info['url'] = format_info['url'] + info['ext'] = determine_ext(info['url']) + return info + info_dict['formats'] = [_format(f) for f in formats] + # TODO: Remove when #980 has been merged + info_dict.update(info_dict['formats'][-1]) + + return info_dict + + +# It also uses the arte_vp_url url from the webpage to extract the information +class ArteTVCreativeIE(ArteTVPlus7IE): + IE_NAME = u'arte.tv:creative' + _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/magazine?/(?P<id>.+)' + + _TEST = { + u'url': u'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design', + u'file': u'050489-002.mp4', + u'info_dict': { + u'title': u'Agentur Amateur #2 - Corporate Design', + }, + } + From 69a0c470b5cbcb789ef0358b7f13a18bf7564fc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 13 Oct 2013 14:21:13 +0200 Subject: [PATCH 109/264] [arte] Add an extractor for future.arte.tv (closes #1593) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/arte.py | 31 +++++++++++++++++++++++++++++-- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 837c5834d..d76945a48 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -6,6 +6,7 @@ from .arte import ( ArteTvIE, ArteTVPlus7IE, ArteTVCreativeIE, + ArteTVFutureIE, ) from .auengine import AUEngineIE from .bandcamp import BandcampIE diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index d296b6d63..5ee8a67b1 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -1,3 +1,4 @@ +# encoding: utf-8 import re import json import xml.etree.ElementTree @@ -8,6 +9,7 @@ from ..utils import ( find_xpath_attr, unified_strdate, determine_ext, + get_element_by_id, ) # There are different sources of video in arte.tv, the extraction process @@ -126,14 +128,21 @@ class ArteTVPlus7IE(InfoExtractor): IE_NAME = u'arte.tv:+7' _VALID_URL = r'https?://www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?' - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) + @classmethod + def _extract_url_info(cls, url): + mobj = re.match(cls._VALID_URL, url) lang = mobj.group('lang') # This is not a real id, it can be for example AJT for the news # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal video_id = mobj.group('id') + return video_id, lang + def _real_extract(self, url): + video_id, lang = self._extract_url_info(url) webpage = self._download_webpage(url, video_id) + return self._extract_from_webpage(webpage, video_id, lang) + + def _extract_from_webpage(self, webpage, video_id, lang): json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url') json_info = self._download_webpage(json_url, video_id, 'Downloading info json') @@ -202,3 +211,21 @@ class ArteTVCreativeIE(ArteTVPlus7IE): }, } + +class ArteTVFutureIE(ArteTVPlus7IE): + IE_NAME = u'arte.tv:future' + _VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de)/(thema|sujet)/.*?#article-anchor-(?P<id>\d+)' + + _TEST = { + u'url': u'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081', + u'file': u'050940-003.mp4', + u'info_dict': { + u'title': u'Les champignons au secours de la planète', + }, + } + + def _real_extract(self, url): + anchor_id, lang = self._extract_url_info(url) + webpage = self._download_webpage(url, anchor_id) + row = get_element_by_id(anchor_id, webpage) + return self._extract_from_webpage(row, anchor_id, lang) From 9378ae6e1d6165c2402890c53c76f7975fee6d7b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 13 Oct 2013 15:54:53 +0200 Subject: [PATCH 110/264] [youku] Allow shortcut youku:ID and make non-matching groups non-matching (#1571) --- youtube_dl/extractor/youku.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 00fa2ccb5..9d88c17f5 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -13,7 +13,7 @@ from ..utils import ( class YoukuIE(InfoExtractor): - _VALID_URL = r'(?:http://)?(v|player)\.youku\.com/(v_show/id_|player\.php/sid/)(?P<ID>[A-Za-z0-9]+)(\.html|/v.swf)' + _VALID_URL = r'(?:(?:http://)?(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|youku:)(?P<ID>[A-Za-z0-9]+)(?:\.html|/v\.swf|)' _TEST = { u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html", u"file": u"XNDgyMDQ2NTQw_part00.flv", From 1fb07d10a3f5f2baf1ebbdbc69d8ee8615cec2f9 Mon Sep 17 00:00:00 2001 From: Jai Grimshaw <jai@jaigrimshaw.com> Date: Mon, 14 Oct 2013 16:18:58 +1100 Subject: [PATCH 111/264] [youtube] Adds #1312 Download annotations Adds #1321 Download annotations from youtube Annotations are downloaded and written to a .annotations.xml file using the https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=$VIDEOID API. Added unit test for annotations. --- test/test_write_annotations.py | 82 +++++++++++++++++++++++++++++++++ youtube_dl/YoutubeDL.py | 17 +++++++ youtube_dl/__init__.py | 4 ++ youtube_dl/extractor/youtube.py | 10 ++++ 4 files changed, 113 insertions(+) create mode 100644 test/test_write_annotations.py diff --git a/test/test_write_annotations.py b/test/test_write_annotations.py new file mode 100644 index 000000000..ba7a9f50a --- /dev/null +++ b/test/test_write_annotations.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python +# coding: utf-8 + +import xml.etree.ElementTree +import os +import sys +import unittest + +# Allow direct execution +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import youtube_dl.YoutubeDL +import youtube_dl.extractor +from youtube_dl.utils import * +from .helper import try_rm + +PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") + +# General configuration (from __init__, not very elegant...) +jar = compat_cookiejar.CookieJar() +cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) +proxy_handler = compat_urllib_request.ProxyHandler() +opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) +compat_urllib_request.install_opener(opener) + +class YoutubeDL(youtube_dl.YoutubeDL): + def __init__(self, *args, **kwargs): + super(YoutubeDL, self).__init__(*args, **kwargs) + self.to_stderr = self.to_screen + +with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: + params = json.load(pf) +params['writeannotations'] = True +params['skip_download'] = True +params['writeinfojson'] = False +params['format'] = 'flv' + +TEST_ID = 'gr51aVj-mLg' +ANNOTATIONS_FILE = TEST_ID + '.flv.annotations.xml' +EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label'] + +class TestAnnotations(unittest.TestCase): + def setUp(self): + # Clear old files + self.tearDown() + + + def test_info_json(self): + expected = list(EXPECTED_ANNOTATIONS) #Two annotations could have the same text. + ie = youtube_dl.extractor.YoutubeIE() + ydl = YoutubeDL(params) + ydl.add_info_extractor(ie) + ydl.download([TEST_ID]) + self.assertTrue(os.path.exists(ANNOTATIONS_FILE)) + annoxml = None + with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof: + annoxml = xml.etree.ElementTree.parse(annof) + self.assertTrue(annoxml is not None, 'Failed to parse annotations XML') + root = annoxml.getroot() + self.assertEqual(root.tag, 'document') + annotationsTag = root.find('annotations') + self.assertEqual(annotationsTag.tag, 'annotations') + annotations = annotationsTag.findall('annotation') + + #Not all the annotations have TEXT children and the annotations are returned unsorted. + for a in annotations: + self.assertEqual(a.tag, 'annotation') + if a.get('type') == 'text': + textTag = a.find('TEXT') + text = textTag.text + self.assertTrue(text in expected) #assertIn only added in python 2.7 + #remove the first occurance, there could be more than one annotation with the same text + expected.remove(text) + #We should have seen (and removed) all the expected annotation texts. + self.assertEqual(len(expected), 0, 'Not all expected annotations were found.') + + + def tearDown(self): + try_rm(ANNOTATIONS_FILE) + +if __name__ == '__main__': + unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e85e03fa4..c8054544a 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -71,6 +71,7 @@ class YoutubeDL(object): logtostderr: Log messages to stderr instead of stdout. writedescription: Write the video description to a .description file writeinfojson: Write the video description to a .info.json file + writeannotations: Write the video annotations to a .annotations.xml file writethumbnail: Write the thumbnail image to a file writesubtitles: Write the video subtitles to a file writeautomaticsub: Write the automatic subtitles to a file @@ -258,6 +259,10 @@ class YoutubeDL(object): """ Report that the metadata file has been written """ self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn) + def report_writeannotations(self, annofn): + """ Report that the annotations file has been written. """ + self.to_screen(u'[info] Writing video annotations to: ' + annofn) + def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" try: @@ -522,6 +527,18 @@ class YoutubeDL(object): self.report_error(u'Cannot write description file ' + descfn) return + if self.params.get('writeannotations', False): + try: + annofn = filename + u'.annotations.xml' + self.report_writeannotations(annofn) + with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: + annofile.write(info_dict['annotations']) + except (KeyError, TypeError): + self.report_warning(u'There are no annotations to write.') + except (OSError, IOError): + self.report_error(u'Cannot write annotations file: ' + annofn) + return + subtitles_are_requested = any([self.params.get('writesubtitles', False), self.params.get('writeautomaticsub')]) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 3513d719f..fb1270ea2 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -339,6 +339,9 @@ def parseOpts(overrideArguments=None): filesystem.add_option('--write-info-json', action='store_true', dest='writeinfojson', help='write video metadata to a .info.json file', default=False) + filesystem.add_option('--write-annotations', + action='store_true', dest='writeannotations', + help='write video annotations to a .annotation file', default=False) filesystem.add_option('--write-thumbnail', action='store_true', dest='writethumbnail', help='write thumbnail image to disk', default=False) @@ -601,6 +604,7 @@ def _real_main(argv=None): 'nopart': opts.nopart, 'updatetime': opts.updatetime, 'writedescription': opts.writedescription, + 'writeannotations': opts.writeannotations, 'writeinfojson': opts.writeinfojson, 'writethumbnail': opts.writethumbnail, 'writesubtitles': opts.writesubtitles, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8222a880f..4347651d7 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1250,6 +1250,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): url_map[itag] = format_url return url_map + def _extract_annotations(self, video_id): + url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id + return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.') + def _real_extract(self, url): # Extract original video URL from URL with redirection, like age verification, using next_url parameter mobj = re.search(self._NEXT_URL_RE, url) @@ -1382,6 +1386,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): else: video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]) + # annotations + video_annotations = None + if self._downloader.params.get('writeannotations', False): + video_annotations = self._extract_annotations(video_id) + # Decide which formats to download try: @@ -1495,6 +1504,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'subtitles': video_subtitles, 'duration': video_duration, 'age_limit': 18 if age_gate else 0, + 'annotations': video_annotations }) return results From ea62a2da466e3fce802930d3685d53a159520719 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda <filippo.valsorda@gmail.com> Date: Mon, 14 Oct 2013 01:32:47 -0400 Subject: [PATCH 112/264] add VideoPremium.tv RTMP support --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/videopremium.py | 40 ++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 youtube_dl/extractor/videopremium.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index d76945a48..748f12e5a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -131,6 +131,7 @@ from .vice import ViceIE from .viddler import ViddlerIE from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE +from .videopremium import VideoPremiumIE from .vimeo import VimeoIE, VimeoChannelIE from .vine import VineIE from .wat import WatIE diff --git a/youtube_dl/extractor/videopremium.py b/youtube_dl/extractor/videopremium.py new file mode 100644 index 000000000..65f39b982 --- /dev/null +++ b/youtube_dl/extractor/videopremium.py @@ -0,0 +1,40 @@ +import re +import random + +from .common import InfoExtractor + + +class VideoPremiumIE(InfoExtractor): + _VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.tv/(?P<id>\w+)(?:/.*)?' + _TEST = { + u'url': u'http://videopremium.tv/4w7oadjsf156', + u'file': u'4w7oadjsf156.f4v', + u'info_dict': { + u"title": u"youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4" + }, + u'params': { + u'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('id') + webpage_url = 'http://videopremium.tv/' + video_id + webpage = self._download_webpage(webpage_url, video_id) + + self.report_extraction(video_id) + + video_title = self._html_search_regex(r'<h2(?:.*?)>\s*(.+?)\s*<', + webpage, u'video title') + + return [{ + 'id': video_id, + 'url': "rtmp://e%d.md.iplay.md/play" % random.randint(1, 16), + 'play_path': "mp4:%s.f4v" % video_id, + 'page_url': "http://videopremium.tv/" + video_id, + 'player_url': "http://videopremium.tv/uplayer/uppod.swf", + 'ext': 'f4v', + 'title': video_title, + }] From f9b3d7af471909a449c3bf5977a7aaa6555a3495 Mon Sep 17 00:00:00 2001 From: Andras Elso <elso.andras@gmail.com> Date: Mon, 14 Oct 2013 13:07:47 +0200 Subject: [PATCH 113/264] Add an extractor for Szombathelyi TV --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/sztvhu.py | 41 ++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 youtube_dl/extractor/sztvhu.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 748f12e5a..14ba6f358 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -112,6 +112,7 @@ from .spiegel import SpiegelIE from .stanfordoc import StanfordOpenClassroomIE from .statigram import StatigramIE from .steam import SteamIE +from .sztvhu import SztvHuIE from .teamcoco import TeamcocoIE from .ted import TEDIE from .tf1 import TF1IE diff --git a/youtube_dl/extractor/sztvhu.py b/youtube_dl/extractor/sztvhu.py new file mode 100644 index 000000000..486f93d26 --- /dev/null +++ b/youtube_dl/extractor/sztvhu.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- + +import re + +from .common import InfoExtractor +from ..utils import determine_ext + +class SztvHuIE(InfoExtractor): + _VALID_URL = r'(?:http://)?(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/([^/]+)/(?P<name>.+)' + _TEST = { + u'url': u'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909', + u'file': u'130909zoldnap.mp4', + u'md5': u'0047eacedc0afd1ceeac99e69173a07e', + u'info_dict': { + u"title": u"Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren", + u"description" : u'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + name = mobj.group('name') + webpage = self._download_webpage(url, name) +# file = self._search_regex(r'var fileHtml5 = "...:(.*?)";', + file = self._search_regex(r'file: "...:(.*?)",', + webpage, 'video file') + title = self._html_search_regex(r'<meta name="title" content="([^"]*)"', + webpage, 'video title').rsplit(' - ', 2)[0] + description = self._html_search_regex(r'<meta name="description" content="([^"]*)"/>', + webpage, 'video description') + thumbnail = self._og_search_thumbnail(webpage) + + video_url = 'http://media.sztv.hu/vod/' + file + + return {'id': name, + 'url' : video_url, + 'title': title, + 'ext': determine_ext(video_url), + 'description': description, + 'thumbnail': thumbnail, + } From c45aa560804e5be087b75c6e9fa8697020e57ea2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 14 Oct 2013 16:25:04 +0200 Subject: [PATCH 114/264] [gamespot] Fix video extraction (fixes #1587) --- youtube_dl/extractor/gamespot.py | 71 +++++++++++++++++--------------- 1 file changed, 37 insertions(+), 34 deletions(-) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 5edbf678a..098768361 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -1,56 +1,59 @@ import re -import xml.etree.ElementTree +import json from .common import InfoExtractor from ..utils import ( - unified_strdate, compat_urllib_parse, + compat_urlparse, + unescapeHTML, + get_meta_content, ) + class GameSpotIE(InfoExtractor): - _WORKING = False _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?' _TEST = { u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/", - u"file": u"6410818.mp4", + u"file": u"gs-2300-6410818.mp4", u"md5": u"b2a30deaa8654fcccd43713a6b6a4825", u"info_dict": { u"title": u"Arma 3 - Community Guide: SITREP I", - u"upload_date": u"20130627", + u'description': u'Check out this video where some of the basics of Arma 3 is explained.', } } - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - page_id = mobj.group('page_id') + page_id = video_id = mobj.group('page_id') webpage = self._download_webpage(url, page_id) - video_id = self._html_search_regex([r'"og:video" content=".*?\?id=(\d+)"', - r'http://www\.gamespot\.com/videoembed/(\d+)'], - webpage, 'video id') - data = compat_urllib_parse.urlencode({'id': video_id, 'newplayer': '1'}) - info_url = 'http://www.gamespot.com/pages/video_player/xml.php?' + data - info_xml = self._download_webpage(info_url, video_id) - doc = xml.etree.ElementTree.fromstring(info_xml) - clip_el = doc.find('./playList/clip') + data_video_json = self._search_regex(r'data-video=\'(.*?)\'', webpage, u'data video') + data_video = json.loads(unescapeHTML(data_video_json)) - http_urls = [{'url': node.find('filePath').text, - 'rate': int(node.find('rate').text)} - for node in clip_el.find('./httpURI')] - best_quality = sorted(http_urls, key=lambda f: f['rate'])[-1] - video_url = best_quality['url'] - title = clip_el.find('./title').text - ext = video_url.rpartition('.')[2] - thumbnail_url = clip_el.find('./screenGrabURI').text - view_count = int(clip_el.find('./views').text) - upload_date = unified_strdate(clip_el.find('./postDate').text) + # Transform the manifest url to a link to the mp4 files + # they are used in mobile devices. + f4m_url = data_video['videoStreams']['f4m_stream'] + f4m_path = compat_urlparse.urlparse(f4m_url).path + QUALITIES_RE = r'((,\d+)+,?)' + qualities = self._search_regex(QUALITIES_RE, f4m_path, u'qualities').strip(',').split(',') + http_path = f4m_path[1:].split('/', 1)[1] + http_template = re.sub(QUALITIES_RE, r'%s', http_path) + http_template = http_template.replace('.csmil/manifest.f4m', '') + http_template = compat_urlparse.urljoin('http://video.gamespotcdn.com/', http_template) + formats = [] + for q in qualities: + formats.append({ + 'url': http_template % q, + 'ext': 'mp4', + 'format_id': q, + }) - return [{ - 'id' : video_id, - 'url' : video_url, - 'ext' : ext, - 'title' : title, - 'thumbnail' : thumbnail_url, - 'upload_date' : upload_date, - 'view_count' : view_count, - }] + info = { + 'id': data_video['guid'], + 'title': compat_urllib_parse.unquote(data_video['title']), + 'formats': formats, + 'description': get_meta_content('description', webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + } + # TODO: Remove when #980 has been merged + info.update(formats[-1]) + return info From 9ed3bdc64d0310e568883b9e81e3dd5114efd7ed Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Tue, 15 Oct 2013 01:20:04 +0200 Subject: [PATCH 115/264] [tudou] Add support for youku links (Closes #1571) --- youtube_dl/extractor/tudou.py | 36 ++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index 1405b73f7..79679a14a 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -7,15 +7,25 @@ from .common import InfoExtractor class TudouIE(InfoExtractor): - _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?' - _TEST = { + _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs|albumplay)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?' + _TESTS = [{ u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', u'file': u'159448201.f4v', u'md5': u'140a49ed444bd22f93330985d8475fcb', u'info_dict': { u"title": u"卡马乔国足开大脚长传冲吊集锦" } - } + }, + { + u'url': u'http://www.tudou.com/albumplay/TenTw_JgiPM/PzsAs5usU9A.html', + u'file': u'todo.mp4', + u'md5': u'todo.mp4', + u'info_dict': { + u'title': u'todo.mp4', + }, + u'add_ie': [u'Youku'], + u'skip': u'Only works from China' + }] def _url_for_id(self, id, quality = None): info_url = "http://v2.tudou.com/f?id="+str(id) @@ -29,14 +39,18 @@ class TudouIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group(2) webpage = self._download_webpage(url, video_id) - title = re.search(",kw:\"(.+)\"",webpage) - if title is None: - title = re.search(",kw: \'(.+)\'",webpage) - title = title.group(1) - thumbnail_url = re.search(",pic: \'(.+?)\'",webpage) - if thumbnail_url is None: - thumbnail_url = re.search(",pic:\"(.+?)\"",webpage) - thumbnail_url = thumbnail_url.group(1) + + m = re.search(r'vcode:\s*[\'"](.+?)[\'"]', webpage) + if m and m.group(1): + return { + '_type': 'url', + 'url': u'youku:' + m.group(1), + 'ie_key': 'Youku' + } + + title = self._search_regex(r",kw:['\"](.+?)[\"']", webpage, u'title') + thumbnail_url = self._search_regex( + r",pic:\s*[\"'](.+?)[\"']", webpage, u'thumbnail URL', fatal=False) segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments') segments = json.loads(segs_json) From 7cf67fbe29684f9681aa591a6eaeb43a5c6b5cb2 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Tue, 15 Oct 2013 01:33:20 +0200 Subject: [PATCH 116/264] [sztvhu] Simplify --- youtube_dl/extractor/sztvhu.py | 45 ++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/sztvhu.py b/youtube_dl/extractor/sztvhu.py index 486f93d26..cd3e203e6 100644 --- a/youtube_dl/extractor/sztvhu.py +++ b/youtube_dl/extractor/sztvhu.py @@ -5,37 +5,40 @@ import re from .common import InfoExtractor from ..utils import determine_ext + class SztvHuIE(InfoExtractor): - _VALID_URL = r'(?:http://)?(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/([^/]+)/(?P<name>.+)' + _VALID_URL = r'(?:http://)?(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)' _TEST = { u'url': u'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909', - u'file': u'130909zoldnap.mp4', - u'md5': u'0047eacedc0afd1ceeac99e69173a07e', + u'file': u'20130909.mp4', + u'md5': u'a6df607b11fb07d0e9f2ad94613375cb', u'info_dict': { u"title": u"Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren", - u"description" : u'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...', + u"description": u'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...', } } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - name = mobj.group('name') - webpage = self._download_webpage(url, name) -# file = self._search_regex(r'var fileHtml5 = "...:(.*?)";', - file = self._search_regex(r'file: "...:(.*?)",', - webpage, 'video file') - title = self._html_search_regex(r'<meta name="title" content="([^"]*)"', - webpage, 'video title').rsplit(' - ', 2)[0] - description = self._html_search_regex(r'<meta name="description" content="([^"]*)"/>', - webpage, 'video description') + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + video_file = self._search_regex( + r'file: "...:(.*?)",', webpage, 'video file') + title = self._html_search_regex( + r'<meta name="title" content="([^"]*) - [^-]*"', + webpage, 'video title') + description = self._html_search_regex( + r'<meta name="description" content="([^"]*)"/>', + webpage, 'video description', fatal=False) thumbnail = self._og_search_thumbnail(webpage) - video_url = 'http://media.sztv.hu/vod/' + file + video_url = 'http://media.sztv.hu/vod/' + video_file - return {'id': name, - 'url' : video_url, - 'title': title, - 'ext': determine_ext(video_url), - 'description': description, - 'thumbnail': thumbnail, - } + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'ext': determine_ext(video_url), + 'description': description, + 'thumbnail': thumbnail, + } From a623df4c7b099bc3adfe943c7155e55c6512aeff Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Tue, 15 Oct 2013 01:34:47 +0200 Subject: [PATCH 117/264] Credit @Elbandi for sztvhu --- youtube_dl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index fb1270ea2..f79b7796c 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -31,6 +31,7 @@ __authors__ = ( 'Huarong Huo', 'Ismael Mejía', 'Steffan \'Ruirize\' James', + 'Andras Elso', ) __license__ = 'Public Domain' From 44a5f1718a5657a08082d8fd3201403bf2683c4f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Tue, 15 Oct 2013 02:00:53 +0200 Subject: [PATCH 118/264] Simplify tests * Make them directly executable again * Move common stuff (md5, parameters) to helper * Never import * * General clean up --- test/helper.py | 28 ++++++++++++++-------- test/test_age_restriction.py | 12 ++++++---- test/test_all_urls.py | 18 +++++++++----- test/test_dailymotion_subtitles.py | 16 ++++++------- test/test_download.py | 38 ++++++++++++++---------------- test/test_playlists.py | 13 +++++----- test/test_utils.py | 14 +++++------ test/test_write_annotations.py | 37 ++++++++++++++--------------- test/test_write_info_json.py | 32 ++++++++++++------------- test/test_youtube_lists.py | 24 ++++++++++++------- test/test_youtube_signature.py | 16 ++++++++----- test/test_youtube_subtitles.py | 29 ++++++++++++++++------- 12 files changed, 154 insertions(+), 123 deletions(-) diff --git a/test/helper.py b/test/helper.py index ad1b74dd3..79a0ede48 100644 --- a/test/helper.py +++ b/test/helper.py @@ -1,22 +1,27 @@ import errno import io +import hashlib import json import os.path import re import types import youtube_dl.extractor -from youtube_dl import YoutubeDL, YoutubeDLHandler -from youtube_dl.utils import ( - compat_cookiejar, - compat_urllib_request, -) +from youtube_dl import YoutubeDL -youtube_dl._setup_opener(timeout=10) -PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") -with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: - parameters = json.load(pf) +def global_setup(): + youtube_dl._setup_opener(timeout=10) + + +def get_params(override=None): + PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), + "parameters.json") + with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: + parameters = json.load(pf) + if override: + parameters.update(override) + return parameters def try_rm(filename): @@ -32,7 +37,7 @@ class FakeYDL(YoutubeDL): def __init__(self): # Different instances of the downloader can't share the same dictionary # some test set the "sublang" parameter, which would break the md5 checks. - params = dict(parameters) + params = get_params() super(FakeYDL, self).__init__(params) self.result = [] @@ -62,3 +67,6 @@ def get_testcases(): for t in getattr(ie, '_TESTS', []): t['name'] = type(ie).__name__[:-len('IE')] yield t + + +md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py index ec3e30572..d500c6edc 100644 --- a/test/test_age_restriction.py +++ b/test/test_age_restriction.py @@ -1,14 +1,16 @@ #!/usr/bin/env python -import sys -import unittest - # Allow direct execution import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import global_setup, try_rm +global_setup() + from youtube_dl import YoutubeDL -from .helper import try_rm def _download_restricted(url, filename, age): diff --git a/test/test_all_urls.py b/test/test_all_urls.py index b28ad000b..56e5f80e1 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -1,14 +1,20 @@ #!/usr/bin/env python -import sys -import unittest - # Allow direct execution import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from test.helper import get_testcases + +from youtube_dl.extractor import ( + gen_extractors, + JustinTVIE, + YoutubeIE, +) -from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE, JustinTVIE, gen_extractors -from .helper import get_testcases class TestAllURLsMatching(unittest.TestCase): def setUp(self): diff --git a/test/test_dailymotion_subtitles.py b/test/test_dailymotion_subtitles.py index e655d280d..c596415c4 100644 --- a/test/test_dailymotion_subtitles.py +++ b/test/test_dailymotion_subtitles.py @@ -1,18 +1,16 @@ #!/usr/bin/env python -import sys -import unittest -import hashlib - # Allow direct execution import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import FakeYDL, global_setup, md5 +global_setup() + from youtube_dl.extractor import DailymotionIE -from youtube_dl.utils import * -from .helper import FakeYDL - -md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() class TestDailymotionSubtitles(unittest.TestCase): def setUp(self): diff --git a/test/test_download.py b/test/test_download.py index 68da4d984..b9a9be11d 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -1,26 +1,31 @@ #!/usr/bin/env python +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import get_params, get_testcases, global_setup, try_rm, md5 +global_setup() + + import hashlib import io -import os import json -import unittest -import sys import socket -import binascii - -# Allow direct execution -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import youtube_dl.YoutubeDL -from youtube_dl.utils import * - -PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") +from youtube_dl.utils import ( + compat_str, + compat_urllib_error, + DownloadError, + ExtractorError, + UnavailableVideoError, +) RETRIES = 3 -md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() - class YoutubeDL(youtube_dl.YoutubeDL): def __init__(self, *args, **kwargs): self.to_stderr = self.to_screen @@ -37,18 +42,12 @@ def _file_md5(fn): with open(fn, 'rb') as f: return hashlib.md5(f.read()).hexdigest() -import test.helper as helper # Set up remaining global configuration -from .helper import get_testcases, try_rm defs = get_testcases() -with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: - parameters = json.load(pf) - class TestDownload(unittest.TestCase): maxDiff = None def setUp(self): - self.parameters = parameters self.defs = defs ### Dynamically generate tests @@ -68,8 +67,7 @@ def generator(test_case): print_skipping(test_case['skip']) return - params = self.parameters.copy() - params.update(test_case.get('params', {})) + params = get_params(test_case.get('params', {})) ydl = YoutubeDL(params) ydl.add_default_info_extractors() diff --git a/test/test_playlists.py b/test/test_playlists.py index 108a4d63b..d6a8d56df 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -1,13 +1,16 @@ #!/usr/bin/env python # encoding: utf-8 -import sys -import unittest -import json # Allow direct execution import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import FakeYDL, global_setup +global_setup() + from youtube_dl.extractor import ( DailymotionPlaylistIE, @@ -18,9 +21,7 @@ from youtube_dl.extractor import ( LivestreamIE, NHLVideocenterIE, ) -from youtube_dl.utils import * -from .helper import FakeYDL class TestPlaylists(unittest.TestCase): def assertIsPlaylist(self, info): diff --git a/test/test_utils.py b/test/test_utils.py index f2c03d421..270669044 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,14 +1,14 @@ #!/usr/bin/env python -# Various small unit tests - -import sys -import unittest -import xml.etree.ElementTree - # Allow direct execution import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +# Various small unit tests +import xml.etree.ElementTree #from youtube_dl.utils import htmlentity_transform from youtube_dl.utils import ( diff --git a/test/test_write_annotations.py b/test/test_write_annotations.py index ba7a9f50a..6f08808cd 100644 --- a/test/test_write_annotations.py +++ b/test/test_write_annotations.py @@ -1,39 +1,38 @@ #!/usr/bin/env python # coding: utf-8 -import xml.etree.ElementTree +# Allow direct execution import os import sys import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -# Allow direct execution -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from test.helper import get_params, global_setup, try_rm +global_setup() + + +import io + +import xml.etree.ElementTree import youtube_dl.YoutubeDL import youtube_dl.extractor -from youtube_dl.utils import * -from .helper import try_rm +from youtube_dl.utils import True -PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") - -# General configuration (from __init__, not very elegant...) -jar = compat_cookiejar.CookieJar() -cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) -proxy_handler = compat_urllib_request.ProxyHandler() -opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) -compat_urllib_request.install_opener(opener) class YoutubeDL(youtube_dl.YoutubeDL): def __init__(self, *args, **kwargs): super(YoutubeDL, self).__init__(*args, **kwargs) self.to_stderr = self.to_screen -with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: - params = json.load(pf) -params['writeannotations'] = True -params['skip_download'] = True -params['writeinfojson'] = False -params['format'] = 'flv' +params = get_params({ + 'writeannotations': True, + 'skip_download': True, + 'writeinfojson': False, + 'format': 'flv', +}) + + TEST_ID = 'gr51aVj-mLg' ANNOTATIONS_FILE = TEST_ID + '.flv.annotations.xml' diff --git a/test/test_write_info_json.py b/test/test_write_info_json.py index de6d5180f..a5b6f6972 100644 --- a/test/test_write_info_json.py +++ b/test/test_write_info_json.py @@ -1,37 +1,34 @@ #!/usr/bin/env python # coding: utf-8 -import json +# Allow direct execution import os import sys import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -# Allow direct execution -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from test.helper import get_params, global_setup +global_setup() + + +import io +import json import youtube_dl.YoutubeDL import youtube_dl.extractor -from youtube_dl.utils import * -PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") - -# General configuration (from __init__, not very elegant...) -jar = compat_cookiejar.CookieJar() -cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) -proxy_handler = compat_urllib_request.ProxyHandler() -opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) -compat_urllib_request.install_opener(opener) class YoutubeDL(youtube_dl.YoutubeDL): def __init__(self, *args, **kwargs): super(YoutubeDL, self).__init__(*args, **kwargs) self.to_stderr = self.to_screen -with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: - params = json.load(pf) -params['writeinfojson'] = True -params['skip_download'] = True -params['writedescription'] = True +params = get_params({ + 'writeinfojson': True, + 'skip_download': True, + 'writedescription': True, +}) + TEST_ID = 'BaW_jenozKc' INFO_JSON_FILE = TEST_ID + '.mp4.info.json' @@ -42,6 +39,7 @@ This is a test video for youtube-dl. For more information, contact phihag@phihag.de .''' + class TestInfoJSON(unittest.TestCase): def setUp(self): # Clear old files diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 0b5c79030..c1753b5bb 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -1,20 +1,26 @@ #!/usr/bin/env python -import sys -import unittest -import json - # Allow direct execution import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE, YoutubeShowIE -from youtube_dl.utils import * +from test.helper import FakeYDL, global_setup +global_setup() + + +from youtube_dl.extractor import ( + YoutubeUserIE, + YoutubePlaylistIE, + YoutubeIE, + YoutubeChannelIE, + YoutubeShowIE, +) -from .helper import FakeYDL class TestYoutubeLists(unittest.TestCase): - def assertIsPlaylist(self,info): + def assertIsPlaylist(self, info): """Make sure the info has '_type' set to 'playlist'""" self.assertEqual(info['_type'], 'playlist') diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 5007d9a16..5e1ff5eb0 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -1,14 +1,18 @@ #!/usr/bin/env python +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import global_setup +global_setup() + + import io import re import string -import sys -import unittest - -# Allow direct execution -import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl.extractor import YoutubeIE from youtube_dl.utils import compat_str, compat_urlretrieve diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index 07850385e..00430a338 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -1,69 +1,79 @@ #!/usr/bin/env python -import sys -import unittest -import hashlib - # Allow direct execution import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import FakeYDL, global_setup, md5 +global_setup() + from youtube_dl.extractor import YoutubeIE -from youtube_dl.utils import * -from .helper import FakeYDL -md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() class TestYoutubeSubtitles(unittest.TestCase): def setUp(self): self.DL = FakeYDL() self.url = 'QRS8MkLhQmM' + def getInfoDict(self): IE = YoutubeIE(self.DL) info_dict = IE.extract(self.url) return info_dict + def getSubtitles(self): info_dict = self.getInfoDict() - return info_dict[0]['subtitles'] + return info_dict[0]['subtitles'] + def test_youtube_no_writesubtitles(self): self.DL.params['writesubtitles'] = False subtitles = self.getSubtitles() self.assertEqual(subtitles, None) + def test_youtube_subtitles(self): self.DL.params['writesubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260') + def test_youtube_subtitles_lang(self): self.DL.params['writesubtitles'] = True self.DL.params['subtitleslangs'] = ['it'] subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d') + def test_youtube_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(len(subtitles.keys()), 13) + def test_youtube_subtitles_sbv_format(self): self.DL.params['writesubtitles'] = True self.DL.params['subtitlesformat'] = 'sbv' subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b') + def test_youtube_subtitles_vtt_format(self): self.DL.params['writesubtitles'] = True self.DL.params['subtitlesformat'] = 'vtt' subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7') + def test_youtube_list_subtitles(self): self.DL.expect_warning(u'Video doesn\'t have automatic captions') self.DL.params['listsubtitles'] = True info_dict = self.getInfoDict() self.assertEqual(info_dict, None) + def test_youtube_automatic_captions(self): self.url = '8YoUxe5ncPo' self.DL.params['writeautomaticsub'] = True self.DL.params['subtitleslangs'] = ['it'] subtitles = self.getSubtitles() self.assertTrue(subtitles['it'] is not None) + def test_youtube_nosubtitles(self): self.DL.expect_warning(u'video doesn\'t have subtitles') self.url = 'sAjKT8FhjI8' @@ -71,6 +81,7 @@ class TestYoutubeSubtitles(unittest.TestCase): self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(len(subtitles), 0) + def test_youtube_multiple_langs(self): self.url = 'QRS8MkLhQmM' self.DL.params['writesubtitles'] = True From a4fd04158eb7e570a0b2d27f6d9b6b9360644807 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Tue, 15 Oct 2013 02:07:26 +0200 Subject: [PATCH 119/264] Do not import * --- youtube_dl/PostProcessor.py | 10 ++++++++-- youtube_dl/__init__.py | 36 +++++++++++++++++++++++++++++++----- 2 files changed, 39 insertions(+), 7 deletions(-) diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py index 039e01498..13b56ede5 100644 --- a/youtube_dl/PostProcessor.py +++ b/youtube_dl/PostProcessor.py @@ -2,9 +2,15 @@ import os import subprocess import sys import time -import datetime -from .utils import * + +from .utils import ( + compat_subprocess_get_DEVNULL, + encodeFilename, + PostProcessingError, + shell_quote, + subtitles_filename, +) class PostProcessor(object): diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index f79b7796c..5248a92c7 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -47,17 +47,43 @@ import shlex import socket import subprocess import sys -import warnings +import traceback import platform -from .utils import * +from .utils import ( + compat_cookiejar, + compat_print, + compat_str, + compat_urllib_request, + DateRange, + decodeOption, + determine_ext, + DownloadError, + get_cachedir, + make_HTTPS_handler, + MaxDownloadsReached, + platform_name, + preferredencoding, + SameFileError, + std_headers, + write_string, + YoutubeDLHandler, +) from .update import update_self from .version import __version__ -from .FileDownloader import * +from .FileDownloader import ( + FileDownloader, +) from .extractor import gen_extractors from .YoutubeDL import YoutubeDL -from .PostProcessor import * +from .PostProcessor import ( + FFmpegMetadataPP, + FFmpegVideoConvertor, + FFmpegExtractAudioPP, + FFmpegEmbedSubtitlePP, +) + def parseOpts(overrideArguments=None): def _readOptions(filename_bytes): @@ -689,7 +715,7 @@ def _real_main(argv=None): if opts.cookiefile is not None: try: jar.save() - except (IOError, OSError) as err: + except (IOError, OSError): sys.exit(u'ERROR: unable to save cookie jar') sys.exit(retcode) From cd054fc491198a5a7c69d76f19693b1cd4d5c086 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Tue, 15 Oct 2013 04:53:02 +0200 Subject: [PATCH 120/264] Use upper-case for prefixes in help to signify bytes (#1043) --- youtube_dl/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 472ae9c0c..3efa5dfd1 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -267,11 +267,11 @@ def parseOpts(overrideArguments=None): help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'') downloader.add_option('-r', '--rate-limit', - dest='ratelimit', metavar='LIMIT', help='maximum download rate in bytes per second (e.g. 50k or 44.6m)') + dest='ratelimit', metavar='LIMIT', help='maximum download rate in bytes per second (e.g. 50K or 4.2M)') downloader.add_option('-R', '--retries', dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) downloader.add_option('--buffer-size', - dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024") + dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16K) (default is %default)', default="1024") downloader.add_option('--no-resize-buffer', action='store_true', dest='noresizebuffer', help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False) From 8381a92120c3826b471e6d2cc38045b5f3a9d15e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Tue, 15 Oct 2013 08:12:30 +0200 Subject: [PATCH 121/264] [websurg] Skipt the test It needs login information. --- youtube_dl/extractor/websurg.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/websurg.py b/youtube_dl/extractor/websurg.py index 7d335d444..43953bfdd 100644 --- a/youtube_dl/extractor/websurg.py +++ b/youtube_dl/extractor/websurg.py @@ -18,7 +18,8 @@ class WeBSurgIE(InfoExtractor): u'file': u'vd01en4012.mp4', u'params': { u'skip_download': True, - } + }, + u'skip': u'Requires login information', } _LOGIN_URL = 'http://www.websurg.com/inc/login/login_div.ajax.php?login=1' From e772692ffd727631e65be90948b7e8c422738a7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Tue, 15 Oct 2013 08:22:20 +0200 Subject: [PATCH 122/264] Fix an import in the tests and the Youtube Shows test --- test/test_write_annotations.py | 1 - test/test_youtube_lists.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_write_annotations.py b/test/test_write_annotations.py index 6f08808cd..35defb895 100644 --- a/test/test_write_annotations.py +++ b/test/test_write_annotations.py @@ -17,7 +17,6 @@ import xml.etree.ElementTree import youtube_dl.YoutubeDL import youtube_dl.extractor -from youtube_dl.utils import True class YoutubeDL(youtube_dl.YoutubeDL): diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index c1753b5bb..4b7a7847b 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -106,7 +106,7 @@ class TestYoutubeLists(unittest.TestCase): dl = FakeYDL() ie = YoutubeShowIE(dl) result = ie.extract('http://www.youtube.com/show/airdisasters') - self.assertTrue(len(result) >= 4) + self.assertTrue(len(result) >= 3) if __name__ == '__main__': unittest.main() From 9d74e308f7caa7f649809366ebcdb5a7caf560b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Tue, 15 Oct 2013 08:22:59 +0200 Subject: [PATCH 123/264] [sztvhu] Fix the title extraction --- youtube_dl/extractor/sztvhu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/sztvhu.py b/youtube_dl/extractor/sztvhu.py index cd3e203e6..81fa35c4b 100644 --- a/youtube_dl/extractor/sztvhu.py +++ b/youtube_dl/extractor/sztvhu.py @@ -25,7 +25,7 @@ class SztvHuIE(InfoExtractor): video_file = self._search_regex( r'file: "...:(.*?)",', webpage, 'video file') title = self._html_search_regex( - r'<meta name="title" content="([^"]*) - [^-]*"', + r'<meta name="title" content="([^"]*?) - [^-]*? - [^-]*?"', webpage, 'video title') description = self._html_search_regex( r'<meta name="description" content="([^"]*)"/>', From 9d4660cab15f374176f87d3f747a559142e4af9b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Tue, 15 Oct 2013 12:05:13 +0200 Subject: [PATCH 124/264] [generic] Support embedded vimeo videos (#1602) --- test/test_utils.py | 16 ++++++++++++++++ youtube_dl/extractor/generic.py | 21 +++++++++++++++++++++ youtube_dl/extractor/vimeo.py | 11 +++++++++-- youtube_dl/utils.py | 17 +++++++++++++++++ 4 files changed, 63 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 270669044..f3fbff042 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# coding: utf-8 # Allow direct execution import os @@ -21,6 +22,8 @@ from youtube_dl.utils import ( find_xpath_attr, get_meta_content, xpath_with_ns, + smuggle_url, + unsmuggle_url, ) if sys.version_info < (3, 0): @@ -155,5 +158,18 @@ class TestUtil(unittest.TestCase): self.assertEqual(find('media:song/media:author').text, u'The Author') self.assertEqual(find('media:song/url').text, u'http://server.com/download.mp3') + def test_smuggle_url(self): + data = {u"ö": u"ö", u"abc": [3]} + url = 'https://foo.bar/baz?x=y#a' + smug_url = smuggle_url(url, data) + unsmug_url, unsmug_data = unsmuggle_url(smug_url) + self.assertEqual(url, unsmug_url) + self.assertEqual(data, unsmug_data) + + res_url, res_data = unsmuggle_url(url) + self.assertEqual(res_url, url) + self.assertEqual(res_data, None) + + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index d48c84f8d..89805250c 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -11,6 +11,8 @@ from ..utils import ( compat_urlparse, ExtractorError, + smuggle_url, + unescapeHTML, ) from .brightcove import BrightcoveIE @@ -29,6 +31,17 @@ class GenericIE(InfoExtractor): u"title": u"R\u00e9gis plante sa Jeep" } }, + # embedded vimeo video + { + u'url': u'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references', + u'file': u'22444065.mp4', + u'md5': u'2903896e23df39722c33f015af0666e2', + u'info_dict': { + u'title': u'ACCU 2011: Move Semantics,Perfect Forwarding, and Rvalue references- Scott Meyers- 13/04/2011', + u"uploader_id": u"skillsmatter", + u"uploader": u"Skills Matter", + } + } ] def report_download_webpage(self, video_id): @@ -127,6 +140,14 @@ class GenericIE(InfoExtractor): bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group()) return self.url_result(bc_url, 'Brightcove') + # Look for embedded Vimeo player + mobj = re.search( + r'<iframe\s+src="(https?://player.vimeo.com/video/.*?)"', webpage) + if mobj: + player_url = unescapeHTML(mobj.group(1)) + surl = smuggle_url(player_url, {'Referer': url}) + return self.url_result(surl, 'Vimeo') + # Start with something easy: JW Player in SWFObject mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if mobj is None: diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index cea29f035..2de56ac81 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -11,6 +11,7 @@ from ..utils import ( get_element_by_attribute, ExtractorError, std_headers, + unsmuggle_url, ) class VimeoIE(InfoExtractor): @@ -53,7 +54,7 @@ class VimeoIE(InfoExtractor): u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software', u'uploader': u'The BLN & Business of Software', }, - }, + } ] def _login(self): @@ -98,6 +99,12 @@ class VimeoIE(InfoExtractor): self._login() def _real_extract(self, url, new_video=True): + url, data = unsmuggle_url(url) + headers = std_headers + if data is not None: + headers = headers.copy() + headers.update(data) + # Extract ID from URL mobj = re.match(self._VALID_URL, url) if mobj is None: @@ -112,7 +119,7 @@ class VimeoIE(InfoExtractor): url = 'https://vimeo.com/' + video_id # Retrieve video webpage to extract further information - request = compat_urllib_request.Request(url, None, std_headers) + request = compat_urllib_request.Request(url, None, headers) webpage = self._download_webpage(request, video_id) # Now we begin extracting as much information as we can from what we diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 3e81c308b..833f981f2 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -945,3 +945,20 @@ class locked_file(object): def shell_quote(args): return ' '.join(map(pipes.quote, args)) + + +def smuggle_url(url, data): + """ Pass additional data in a URL for internal use. """ + + sdata = compat_urllib_parse.urlencode( + {u'__youtubedl_smuggle': json.dumps(data)}) + return url + u'#' + sdata + + +def unsmuggle_url(smug_url): + if not '#__youtubedl_smuggle' in smug_url: + return smug_url, None + url, _, sdata = smug_url.rpartition(u'#') + jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0] + data = json.loads(jsond) + return url, data From 8abbf43f21d2afcfa2db1744a3f6ccfc917cc8d7 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Tue, 15 Oct 2013 12:06:45 +0200 Subject: [PATCH 125/264] release 2013.10.15 --- README.md | 7 +++++-- youtube_dl/version.py | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8824daee2..6dae0a580 100644 --- a/README.md +++ b/README.md @@ -57,9 +57,10 @@ which means you can modify it, redistribute it or use it however you like. file. Record all downloaded videos in it. ## Download Options: - -r, --rate-limit LIMIT maximum download rate (e.g. 50k or 44.6m) + -r, --rate-limit LIMIT maximum download rate in bytes per second (e.g. + 50K or 4.2M) -R, --retries RETRIES number of retries (default is 10) - --buffer-size SIZE size of download buffer (e.g. 1024 or 16k) + --buffer-size SIZE size of download buffer (e.g. 1024 or 16K) (default is 1024) --no-resize-buffer do not automatically adjust the buffer size. By default, the buffer size is automatically resized @@ -100,6 +101,7 @@ which means you can modify it, redistribute it or use it however you like. file modification time --write-description write video description to a .description file --write-info-json write video metadata to a .info.json file + --write-annotations write video annotations to a .annotation file --write-thumbnail write thumbnail image to disk ## Verbosity / Simulation Options: @@ -166,6 +168,7 @@ which means you can modify it, redistribute it or use it however you like. processed files are overwritten by default --embed-subs embed subtitles in the video (only for mp4 videos) + --add-metadata add metadata to the files # CONFIGURATION diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 1004af116..97dc5e9cc 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.09' +__version__ = '2013.10.15' From 996d1c3242be5569bb4b579b2e3ad25a6d928dfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Tue, 15 Oct 2013 23:08:52 +0200 Subject: [PATCH 126/264] Don't include the test/testdata directory in the youtube-dl.tar.gz The last releases included big files that increased the size of the compressed file. --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 85dacfa4c..abd89be49 100644 --- a/Makefile +++ b/Makefile @@ -71,6 +71,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash- --exclude '*~' \ --exclude '__pycache' \ --exclude '.git' \ + --exclude 'testdata' \ -- \ bin devscripts test youtube_dl \ CHANGELOG LICENSE README.md README.txt \ From 76965512daae80b7f1e43f063308ff93d6dfbc8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Tue, 15 Oct 2013 23:15:15 +0200 Subject: [PATCH 127/264] Fix the indentation of the Makefile It uses tabs, no spaces. --- Makefile | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index abd89be49..c6d09932b 100644 --- a/Makefile +++ b/Makefile @@ -13,13 +13,13 @@ PYTHON=/usr/bin/env python # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local ifeq ($(PREFIX),/usr) - SYSCONFDIR=/etc + SYSCONFDIR=/etc else - ifeq ($(PREFIX),/usr/local) - SYSCONFDIR=/etc - else - SYSCONFDIR=$(PREFIX)/etc - endif + ifeq ($(PREFIX),/usr/local) + SYSCONFDIR=/etc + else + SYSCONFDIR=$(PREFIX)/etc + endif endif install: youtube-dl youtube-dl.1 youtube-dl.bash-completion @@ -71,7 +71,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash- --exclude '*~' \ --exclude '__pycache' \ --exclude '.git' \ - --exclude 'testdata' \ + --exclude 'testdata' \ -- \ bin devscripts test youtube_dl \ CHANGELOG LICENSE README.md README.txt \ From bfd14b1b2fdf1f0e54e639f9695f73edf578e241 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Wed, 16 Oct 2013 16:57:40 +0200 Subject: [PATCH 128/264] Add an extractor for rutube.ru (closes #1136) It downloads with a m3u8 manifest, requires ffmpeg. --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/rutube.py | 58 ++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 youtube_dl/extractor/rutube.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 5f0e2ec9b..4f20fbd1a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -102,6 +102,7 @@ from .ro220 import Ro220IE from .rottentomatoes import RottenTomatoesIE from .roxwel import RoxwelIE from .rtlnow import RTLnowIE +from .rutube import RutubeIE from .sina import SinaIE from .slashdot import SlashdotIE from .slideshare import SlideshareIE diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py new file mode 100644 index 000000000..a18034fe2 --- /dev/null +++ b/youtube_dl/extractor/rutube.py @@ -0,0 +1,58 @@ +# encoding: utf-8 +import re +import json + +from .common import InfoExtractor +from ..utils import ( + compat_urlparse, + compat_str, + ExtractorError, +) + + +class RutubeIE(InfoExtractor): + _VALID_URL = r'https?://rutube.ru/video/(?P<long_id>\w+)' + + _TEST = { + u'url': u'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', + u'file': u'3eac3b4561676c17df9132a9a1e62e3e.mp4', + u'info_dict': { + u'title': u'Раненный кенгуру забежал в аптеку', + u'uploader': u'NTDRussian', + u'uploader_id': u'29790', + }, + u'params': { + # It requires ffmpeg (m3u8 download) + u'skip_download': True, + }, + } + + def _get_api_response(self, short_id, subpath): + api_url = 'http://rutube.ru/api/play/%s/%s/?format=json' % (subpath, short_id) + response_json = self._download_webpage(api_url, short_id, + u'Downloading %s json' % subpath) + return json.loads(response_json) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + long_id = mobj.group('long_id') + webpage = self._download_webpage(url, long_id) + og_video = self._og_search_video_url(webpage) + short_id = compat_urlparse.urlparse(og_video).path[1:] + options = self._get_api_response(short_id, 'options') + trackinfo = self._get_api_response(short_id, 'trackinfo') + # Some videos don't have the author field + author = trackinfo.get('author') or {} + m3u8_url = trackinfo['video_balancer'].get('m3u8') + if m3u8_url is None: + raise ExtractorError(u'Couldn\'t find m3u8 manifest url') + + return { + 'id': trackinfo['id'], + 'title': trackinfo['title'], + 'url': m3u8_url, + 'ext': 'mp4', + 'thumbnail': options['thumbnail_url'], + 'uploader': author.get('name'), + 'uploader_id': compat_str(author['id']) if author else None, + } From 2d0efe70a684cf378c6c325eafc8e52a85321157 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 17 Oct 2013 00:46:11 +0200 Subject: [PATCH 129/264] [brightcove] Fix more broken XML (#1608) --- youtube_dl/extractor/brightcove.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 745212f2f..58f3d9708 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -53,6 +53,8 @@ class BrightcoveIE(InfoExtractor): # Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553 object_str = re.sub(r'(<param name="[^"]+" value="[^"]+")>', lambda m: m.group(1) + '/>', object_str) + # Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608 + object_str = object_str.replace(u'<--', u'<!--') object_doc = xml.etree.ElementTree.fromstring(object_str) assert u'BrightcoveExperience' in object_doc.attrib['class'] From 591454798d330adfcf8e22ef66fed7bbdf9f628b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 17 Oct 2013 01:02:17 +0200 Subject: [PATCH 130/264] [brightcove] Raise error if playlist is empty (#1608) --- youtube_dl/extractor/brightcove.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 58f3d9708..1392f382a 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -98,7 +98,10 @@ class BrightcoveIE(InfoExtractor): playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key, player_key, u'Downloading playlist information') - playlist_info = json.loads(playlist_info)['videoList'] + json_data = json.loads(playlist_info) + if 'videoList' not in json_data: + raise ExtractorError(u'Empty playlist') + playlist_info = json_data['videoList'] videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']] return self.playlist_result(videos, playlist_id=playlist_info['id'], From a733eb6c534625b51e42763d8c4b8f29e176e512 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 17 Oct 2013 02:19:19 +0200 Subject: [PATCH 131/264] [youtube] Do not crash if caption info is missing altogether (Fixes #1610) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4347651d7..fb7c42830 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1150,7 +1150,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): list_page = self._download_webpage(list_url, video_id) caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8')) original_lang_node = caption_list.find('track') - if original_lang_node.attrib.get('kind') != 'asr' : + if not original_lang_node or original_lang_node.attrib.get('kind') != 'asr' : self._downloader.report_warning(u'Video doesn\'t have automatic captions') return {} original_lang = original_lang_node.attrib['lang_code'] From 54ed626cf8fa68b76a6ae21f659d84482ab319df Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 17 Oct 2013 02:20:26 +0200 Subject: [PATCH 132/264] release 2013.10.17 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 97dc5e9cc..22a51ffe6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.15' +__version__ = '2013.10.17' From d21ab292008b114ea9e99edc2e9f2adde49415ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 17 Oct 2013 08:20:58 +0200 Subject: [PATCH 133/264] Add an extractor for techtalks.tv (closes #1606) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/techtalks.py | 65 +++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 youtube_dl/extractor/techtalks.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 4f20fbd1a..db69af361 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -115,6 +115,7 @@ from .statigram import StatigramIE from .steam import SteamIE from .sztvhu import SztvHuIE from .teamcoco import TeamcocoIE +from .techtalks import TechTalksIE from .ted import TEDIE from .tf1 import TF1IE from .thisav import ThisAVIE diff --git a/youtube_dl/extractor/techtalks.py b/youtube_dl/extractor/techtalks.py new file mode 100644 index 000000000..a55f236cb --- /dev/null +++ b/youtube_dl/extractor/techtalks.py @@ -0,0 +1,65 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + get_element_by_attribute, + clean_html, +) + + +class TechTalksIE(InfoExtractor): + _VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/' + + _TEST = { + u'url': u'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/', + u'playlist': [ + { + u'file': u'57758.flv', + u'info_dict': { + u'title': u'Learning Topic Models --- Going beyond SVD', + }, + }, + { + u'file': u'57758-slides.flv', + u'info_dict': { + u'title': u'Learning Topic Models --- Going beyond SVD', + }, + }, + ], + u'params': { + # rtmp download + u'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + talk_id = mobj.group('id') + webpage = self._download_webpage(url, talk_id) + rtmp_url = self._search_regex(r'netConnectionUrl: \'(.*?)\'', webpage, + u'rtmp url') + play_path = self._search_regex(r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"', + webpage, u'presenter play path') + title = clean_html(get_element_by_attribute('class', 'title', webpage)) + video_info = { + 'id': talk_id, + 'title': title, + 'url': rtmp_url, + 'play_path': play_path, + 'ext': 'flv', + } + m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage) + if m_slides is None: + return video_info + else: + return [ + video_info, + # The slides video + { + 'id': talk_id + '-slides', + 'title': title, + 'url': rtmp_url, + 'play_path': m_slides.group(1), + 'ext': 'flv', + }, + ] From 5d254f776a4d306961b6d22d8f7615844ef64390 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 18 Oct 2013 00:27:51 +0200 Subject: [PATCH 134/264] Fix test --- test/test_YoutubeDL.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 2b9fb92ee..ee210ed23 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -1,27 +1,32 @@ #!/usr/bin/env python -import sys -import unittest - # Allow direct execution import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import FakeYDL -from helper import FakeYDL, parameters class YDL(FakeYDL): def __init__(self): super(YDL, self).__init__() self.downloaded_info_dicts = [] + def process_info(self, info_dict): self.downloaded_info_dicts.append(info_dict) + class TestFormatSelection(unittest.TestCase): def test_prefer_free_formats(self): # Same resolution => download webm ydl = YDL() ydl.params['prefer_free_formats'] = True - formats = [{u'ext': u'webm', u'height': 460},{u'ext': u'mp4', u'height': 460}] + formats = [ + {u'ext': u'webm', u'height': 460}, + {u'ext': u'mp4', u'height': 460}, + ] info_dict = {u'formats': formats, u'extractor': u'test'} ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] @@ -30,7 +35,10 @@ class TestFormatSelection(unittest.TestCase): # Different resolution => download best quality (mp4) ydl = YDL() ydl.params['prefer_free_formats'] = True - formats = [{u'ext': u'webm', u'height': 720},{u'ext': u'mp4',u'height': 1080}] + formats = [ + {u'ext': u'webm', u'height': 720}, + {u'ext': u'mp4', u'height': 1080}, + ] info_dict[u'formats'] = formats ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] @@ -39,7 +47,10 @@ class TestFormatSelection(unittest.TestCase): # No prefer_free_formats => keep original formats order ydl = YDL() ydl.params['prefer_free_formats'] = False - formats = [{u'ext': u'webm', u'height': 720},{u'ext': u'flv',u'height': 720}] + formats = [ + {u'ext': u'webm', u'height': 720}, + {u'ext': u'flv', u'height': 720}, + ] info_dict[u'formats'] = formats ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] From f4d96df0f1e978d580197fafb8dacade4b611ef3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 18 Oct 2013 00:46:35 +0200 Subject: [PATCH 135/264] Extend #980 with --max-quality support --- test/helper.py | 4 ++-- test/test_YoutubeDL.py | 43 +++++++++++++++++++++++++++++++++++++++-- youtube_dl/YoutubeDL.py | 4 +++- youtube_dl/utils.py | 9 +++++++++ 4 files changed, 55 insertions(+), 5 deletions(-) diff --git a/test/helper.py b/test/helper.py index 79a0ede48..777119ea5 100644 --- a/test/helper.py +++ b/test/helper.py @@ -34,10 +34,10 @@ def try_rm(filename): class FakeYDL(YoutubeDL): - def __init__(self): + def __init__(self, override=None): # Different instances of the downloader can't share the same dictionary # some test set the "sublang" parameter, which would break the md5 checks. - params = get_params() + params = get_params(override=override) super(FakeYDL, self).__init__(params) self.result = [] diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index ee210ed23..ba6dc05bc 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -10,13 +10,17 @@ from test.helper import FakeYDL class YDL(FakeYDL): - def __init__(self): - super(YDL, self).__init__() + def __init__(self, *args, **kwargs): + super(YDL, self).__init__(*args, **kwargs) self.downloaded_info_dicts = [] + self.msgs = [] def process_info(self, info_dict): self.downloaded_info_dicts.append(info_dict) + def to_screen(self, msg): + self.msgs.append(msg) + class TestFormatSelection(unittest.TestCase): def test_prefer_free_formats(self): @@ -56,5 +60,40 @@ class TestFormatSelection(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded[u'ext'], u'flv') + def test_format_limit(self): + formats = [ + {u'format_id': u'meh'}, + {u'format_id': u'good'}, + {u'format_id': u'great'}, + {u'format_id': u'excellent'}, + ] + info_dict = { + u'formats': formats, u'extractor': u'test', 'id': 'testvid'} + + ydl = YDL() + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded[u'format_id'], u'excellent') + + ydl = YDL({'format_limit': 'good'}) + assert ydl.params['format_limit'] == 'good' + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded[u'format_id'], u'good') + + ydl = YDL({'format_limit': 'great', 'format': 'all'}) + ydl.process_ie_result(info_dict) + self.assertEqual(ydl.downloaded_info_dicts[0][u'format_id'], u'meh') + self.assertEqual(ydl.downloaded_info_dicts[1][u'format_id'], u'good') + self.assertEqual(ydl.downloaded_info_dicts[2][u'format_id'], u'great') + self.assertTrue('3' in ydl.msgs[0]) + + ydl = YDL() + ydl.params['format_limit'] = 'excellent' + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded[u'format_id'], u'excellent') + + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index f22a8bd0e..fd98321f1 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -488,7 +488,9 @@ class YoutubeDL(object): format_limit = self.params.get('format_limit', None) if format_limit: - formats = [f for f in formats if f['format_id'] <= format_limit] + formats = list(takewhile_inclusive( + lambda f: f['format_id'] != format_limit, formats + )) if self.params.get('prefer_free_formats'): def _free_formats_key(f): try: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 833f981f2..bfb8f6bcd 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -947,6 +947,15 @@ def shell_quote(args): return ' '.join(map(pipes.quote, args)) +def takewhile_inclusive(pred, seq): + """ Like itertools.takewhile, but include the latest evaluated element + (the first element so that Not pred(e)) """ + for e in seq: + yield e + if not pred(e): + return + + def smuggle_url(url, data): """ Pass additional data in a URL for internal use. """ From 416a5efce7f70a6a3ef88ac902100fa5211e181e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 18 Oct 2013 00:49:45 +0200 Subject: [PATCH 136/264] fix typos --- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/extractor/common.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index fd98321f1..296c0f992 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -512,7 +512,7 @@ class YoutubeDL(object): formats_to_download = formats else: # We can accept formats requestd in the format: 34/10/5, we pick - # the first that is availble, starting from left + # the first that is available, starting from left req_formats = req_format.split('/') for rf in req_formats: matches = filter(lambda f:f['format_id'] == rf ,formats) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2a5a85dc6..d4af3b5eb 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -365,7 +365,7 @@ class SearchInfoExtractor(InfoExtractor): def _get_n_results(self, query, n): """Get a specified number of results for a query""" - raise NotImplementedError("This method must be implemented by sublclasses") + raise NotImplementedError("This method must be implemented by subclasses") @property def SEARCH_KEY(self): From 7c58ef3275a2463728528b395fe584815aa6b16a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 18 Oct 2013 11:16:11 +0200 Subject: [PATCH 137/264] [tudou] Fix title regex (Fixes #1614) --- youtube_dl/extractor/tudou.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index 79679a14a..7a3891b89 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -48,7 +48,8 @@ class TudouIE(InfoExtractor): 'ie_key': 'Youku' } - title = self._search_regex(r",kw:['\"](.+?)[\"']", webpage, u'title') + title = self._search_regex( + r",kw:\s*['\"](.+?)[\"']", webpage, u'title') thumbnail_url = self._search_regex( r",pic:\s*[\"'](.+?)[\"']", webpage, u'thumbnail URL', fatal=False) From 8e55e9abfc4aec5369161e05789b3eacf1865246 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 18 Oct 2013 11:17:21 +0200 Subject: [PATCH 138/264] release 2013.10.18 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 22a51ffe6..ddef5fa50 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.17' +__version__ = '2013.10.18' From 53c1d3ef4992c7682e56819e0a4079d7bbd9d44a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 18 Oct 2013 11:44:57 +0200 Subject: [PATCH 139/264] Check for embedded YouTube player (Fixes #1616) --- youtube_dl/extractor/generic.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 89805250c..69e0a7bd2 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -142,12 +142,19 @@ class GenericIE(InfoExtractor): # Look for embedded Vimeo player mobj = re.search( - r'<iframe\s+src="(https?://player.vimeo.com/video/.*?)"', webpage) + r'<iframe[^>]+?src="(https?://player.vimeo.com/video/.+?)"', webpage) if mobj: player_url = unescapeHTML(mobj.group(1)) surl = smuggle_url(player_url, {'Referer': url}) return self.url_result(surl, 'Vimeo') + # Look for embedded YouTube player + mobj = re.search( + r'<iframe[^>]+?src="(https?://(?:www\.)?youtube.com/embed/.+?)"', webpage) + if mobj: + surl = unescapeHTML(mobj.group(1)) + return self.url_result(surl, 'Youtube') + # Start with something easy: JW Player in SWFObject mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if mobj is None: From 82697fb2ab9524b426f9e0e5bc7c49aa3f86b47c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 18 Oct 2013 11:45:30 +0200 Subject: [PATCH 140/264] release 2013.10.18.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ddef5fa50..971530f8b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.18' +__version__ = '2013.10.18.1' From cce722b79ccbe0883a1fdda4f13fe7a3f9465462 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 18 Oct 2013 11:50:48 +0200 Subject: [PATCH 141/264] Add metavar to --cache-dir --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index cd642ce3b..47acb4320 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -196,7 +196,7 @@ def parseOpts(overrideArguments=None): general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL') general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') general.add_option( - '--cache-dir', dest='cachedir', default=get_cachedir(), + '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .') general.add_option( '--no-cache-dir', action='store_const', const=None, dest='cachedir', From f44415360e7bdf1b7b90c0c4b08199518210f009 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 18 Oct 2013 13:49:25 +0200 Subject: [PATCH 142/264] Use the console_scripts entry point if setuptools is available --- setup.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3b6dc2d40..347a4f2d8 100644 --- a/setup.py +++ b/setup.py @@ -8,6 +8,7 @@ import sys try: from setuptools import setup + setuptools_available = True except ImportError: from distutils.core import setup @@ -43,13 +44,16 @@ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': params = py2exe_params else: params = { - 'scripts': ['bin/youtube-dl'], 'data_files': [ # Installing system-wide would require sudo... ('etc/bash_completion.d', ['youtube-dl.bash-completion']), ('share/doc/youtube_dl', ['README.txt']), ('share/man/man1/', ['youtube-dl.1']) ] } + if setuptools_available: + params['entry_points'] = {'console_scripts': ['youtube-dl = youtube_dl:main']} + else: + params['scripts'] = ['bin/youtube-dl'] # Get the version from youtube_dl/version.py without importing the package exec(compile(open('youtube_dl/version.py').read(), From 16f36a6fc92d6ab89293e45de65475455fbc1b47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= <rbrito@ime.usp.br> Date: Fri, 18 Oct 2013 17:50:55 -0300 Subject: [PATCH 143/264] extractor: youtube: Set extension of AAC audio formats to m4a. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This, in particular, eases downloading both audio and videos in DASH formats before muxing them, which alleviates the problem that I exposed on issue Furthermore, one may argue that this is, indeed, the case for correctness's sake. Signed-off-by: Rogério Brito <rbrito@ime.usp.br> --- youtube_dl/extractor/youtube.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index fb7c42830..cfc142f26 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -237,9 +237,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): '137': 'mp4', '138': 'mp4', '139': 'mp4', - '140': 'mp4', - '141': 'mp4', - '160': 'mp4', + '140': 'm4a', + '141': 'm4a', + '160': 'm4a', # Dash webm '171': 'webm', From fdefe96bf2bf93c7df590a145b4d7572eb1b1723 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 18 Oct 2013 23:06:49 +0200 Subject: [PATCH 144/264] Document %(format)s (#1612) --- youtube_dl/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 47acb4320..bb59f257b 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -332,7 +332,9 @@ def parseOpts(overrideArguments=None): help=('output filename template. Use %(title)s to get the title, ' '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, ' '%(autonumber)s to get an automatically incremented number, ' - '%(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), ' + '%(ext)s for the filename extension, ' + '%(format)s for the format description (like "22 - 1280x720" or "HD")' + '%(upload_date)s for the upload date (YYYYMMDD), ' '%(extractor)s for the provider (youtube, metacafe, etc), ' '%(id)s for the video id , %(playlist)s for the playlist the video is in, ' '%(playlist_index)s for the position in the playlist and %% for a literal percent. ' From 41fd7c7e608ad656251ffdf55abc4c715a6efe12 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 18 Oct 2013 23:09:32 +0200 Subject: [PATCH 145/264] Add new option --abort-on-error --- youtube_dl/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index bb59f257b..fce1adf0c 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -179,6 +179,9 @@ def parseOpts(overrideArguments=None): action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') general.add_option('-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False) + general.add_option('--abort-on-error', + action='store_false', dest='ignoreerrors', + help='Abort downloading of further videos (in the playlist or the command line) if an error occurs') general.add_option('--dump-user-agent', action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False) From 3d2986063cbb688a840fea3334cddcd4067d2db7 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 18 Oct 2013 23:13:46 +0200 Subject: [PATCH 146/264] [bash-completion] Do not use dash in function name (Fixes #1623) --- devscripts/bash-completion.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/devscripts/bash-completion.in b/devscripts/bash-completion.in index bd10f63c2..ce893fcbe 100644 --- a/devscripts/bash-completion.in +++ b/devscripts/bash-completion.in @@ -1,4 +1,4 @@ -__youtube-dl() +__youtube_dl() { local cur prev opts COMPREPLY=() @@ -15,4 +15,4 @@ __youtube-dl() fi } -complete -F __youtube-dl youtube-dl +complete -F __youtube_dl youtube-dl From b186d949cf9d1cd6b723113cbef393d58e87ea3c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 18 Oct 2013 23:22:54 +0200 Subject: [PATCH 147/264] release 2013.10.18.2 --- README.md | 24 ++++++++++++++---------- youtube_dl/version.py | 2 +- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 6dae0a580..2b8db0cfc 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,8 @@ which means you can modify it, redistribute it or use it however you like. sudo if needed) -i, --ignore-errors continue on download errors, for example to to skip unavailable videos in a playlist + --abort-on-error Abort downloading of further videos (in the + playlist or the command line) if an error occurs --dump-user-agent display the current browser identification --user-agent UA specify a custom user agent --referer REF specify a custom referer, use if the video access @@ -30,7 +32,7 @@ which means you can modify it, redistribute it or use it however you like. --extractor-descriptions Output descriptions of all supported extractors --proxy URL Use the specified HTTP/HTTPS proxy --no-check-certificate Suppress HTTPS certificate validation. - --cache-dir None Location in the filesystem where youtube-dl can + --cache-dir DIR Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache /youtube-dl . @@ -76,15 +78,17 @@ which means you can modify it, redistribute it or use it however you like. %(uploader_id)s for the uploader nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename - extension, %(upload_date)s for the upload date - (YYYYMMDD), %(extractor)s for the provider - (youtube, metacafe, etc), %(id)s for the video id - , %(playlist)s for the playlist the video is in, - %(playlist_index)s for the position in the - playlist and %% for a literal percent. Use - to - output to stdout. Can also be used to download to - a different directory, for example with -o '/my/d - ownloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' . + extension, %(format)s for the format description + (like "22 - 1280x720" or "HD")%(upload_date)s for + the upload date (YYYYMMDD), %(extractor)s for the + provider (youtube, metacafe, etc), %(id)s for the + video id , %(playlist)s for the playlist the + video is in, %(playlist_index)s for the position + in the playlist and %% for a literal percent. Use + - to output to stdout. Can also be used to + download to a different directory, for example + with -o '/my/downloads/%(uploader)s/%(title)s-%(i + d)s.%(ext)s' . --autonumber-size NUMBER Specifies the number of digits in %(autonumber)s when it is present in output filename template or --autonumber option is given diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 971530f8b..e2cc6423d 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.18.1' +__version__ = '2013.10.18.2' From d5594202aa3a3601a88e4b04826816a4fb997c74 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 18 Oct 2013 23:34:55 +0200 Subject: [PATCH 148/264] Simplify release process --- devscripts/release.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index 796468b4b..2766174c1 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -88,10 +88,6 @@ ROOT=$(pwd) "$ROOT/devscripts/gh-pages/update-sites.py" git add *.html *.html.in update git commit -m "release $version" - git show HEAD - read -p "Is it good, can I push? (y/n) " -n 1 - if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi - echo git push "$ROOT" gh-pages git push "$ORIGIN_URL" gh-pages ) From 8e590a117f61bfb034ba6d181f5752b6977b4262 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 18 Oct 2013 23:35:17 +0200 Subject: [PATCH 149/264] [xnxx] Add age_limit --- youtube_dl/extractor/xnxx.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index 40d848900..8a0eb1afd 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -18,7 +18,8 @@ class XNXXIE(InfoExtractor): u'file': u'1135332.flv', u'md5': u'0831677e2b4761795f68d417e0b7b445', u'info_dict': { - u"title": u"lida \u00bb Naked Funny Actress (5)" + u"title": u"lida \u00bb Naked Funny Actress (5)", + u"age_limit": 18, } } @@ -50,4 +51,5 @@ class XNXXIE(InfoExtractor): 'ext': 'flv', 'thumbnail': video_thumbnail, 'description': None, + 'age_limit': 18, }] From f6f1fc9286be14aa8e6a6ccfce50e159b46ab489 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= <rbrito@ime.usp.br> Date: Fri, 18 Oct 2013 18:53:00 -0300 Subject: [PATCH 150/264] extractor: youtube: Fix extension of dash formats. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While we are at it, separate the audio formats from the video formats. Signed-off-by: Rogério Brito <rbrito@ime.usp.br> --- youtube_dl/extractor/youtube.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index cfc142f26..96ead3310 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -236,10 +236,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): '136': 'mp4', '137': 'mp4', '138': 'mp4', - '139': 'mp4', + '160': 'mp4', + + # Dash mp4 audio + '139': 'm4a', '140': 'm4a', '141': 'm4a', - '160': 'm4a', # Dash webm '171': 'webm', From 8ed6b34477367ff2900a0e8b3628056188598fe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= <rbrito@ime.usp.br> Date: Fri, 18 Oct 2013 19:32:37 -0300 Subject: [PATCH 151/264] extractor: Set age limit on some adult-related extractors. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is similar in spirit to what was done in commit 8e590a117f. Signed-off-by: Rogério Brito <rbrito@ime.usp.br> --- youtube_dl/extractor/xvideos.py | 4 +++- youtube_dl/extractor/youporn.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index c3b9736d7..90138d7e5 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -13,7 +13,8 @@ class XVideosIE(InfoExtractor): u'file': u'939581.flv', u'md5': u'1d0c835822f0a71a7bf011855db929d0', u'info_dict': { - u"title": u"Funny Porns By >>>>S<<<<<< -1" + u"title": u"Funny Porns By >>>>S<<<<<< -1", + u"age_limit": 18, } } @@ -46,6 +47,7 @@ class XVideosIE(InfoExtractor): 'ext': 'flv', 'thumbnail': video_thumbnail, 'description': None, + 'age_limit': 18, } return [info] diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index b1f93dd1b..e3b56cece 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -26,7 +26,8 @@ class YouPornIE(InfoExtractor): u"upload_date": u"20101221", u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", u"uploader": u"Ask Dan And Jennifer", - u"title": u"Sex Ed: Is It Safe To Masturbate Daily?" + u"title": u"Sex Ed: Is It Safe To Masturbate Daily?", + u"age_limit": 18, } } From 284acd57d6e37a605e3651dce73bce89cba48390 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 19 Oct 2013 11:14:20 +0200 Subject: [PATCH 152/264] Add an author email --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 3b6dc2d40..2391c5272 100644 --- a/setup.py +++ b/setup.py @@ -63,6 +63,7 @@ setup( ' YouTube.com and other video sites.', url='https://github.com/rg3/youtube-dl', author='Ricardo Garcia', + author_email='ytdl@yt-dl.org', maintainer='Philipp Hagemeister', maintainer_email='phihag@phihag.de', packages=['youtube_dl', 'youtube_dl.extractor'], From b0505eb6113ab6c02543d7b8272da39d8d57eff8 Mon Sep 17 00:00:00 2001 From: rzhxeo <rzhxeot7z81b4700@mailcatch.com> Date: Sat, 19 Oct 2013 16:46:17 +0200 Subject: [PATCH 153/264] [CinemassacreIE] Fix information extraction --- youtube_dl/extractor/cinemassacre.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py index 6925b96c2..8260e8192 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/cinemassacre.py @@ -12,6 +12,7 @@ class CinemassacreIE(InfoExtractor): _TESTS = [{ u'url': u'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', u'file': u'19911.flv', + u'md5': u'f9bb7ede54d1229c9846e197b4737e06', u'info_dict': { u'upload_date': u'20121110', u'title': u'“Angry Video Game Nerd: The Movie” – Trailer', @@ -25,6 +26,7 @@ class CinemassacreIE(InfoExtractor): { u'url': u'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', u'file': u'521be8ef82b16.flv', + u'md5': u'91b248e1e2473d5bff55d6010518111f', u'info_dict': { u'upload_date': u'20131002', u'title': u'The Mummy’s Hand (1940)', @@ -55,23 +57,29 @@ class CinemassacreIE(InfoExtractor): video_description = None playerdata = self._download_webpage(playerdata_url, video_id) - base_url = self._html_search_regex(r'\'streamer\': \'(?P<base_url>rtmp://.*?)/(?:vod|Cinemassacre)\'', - playerdata, u'base_url') - base_url += '/Cinemassacre/' - # Important: The file names in playerdata are not used by the player and even wrong for some videos - sd_file = 'Cinemassacre-%s_high.mp4' % video_id - hd_file = 'Cinemassacre-%s.mp4' % video_id - video_thumbnail = 'http://image.screenwavemedia.com/Cinemassacre/Cinemassacre-%s_thumb_640x360.jpg' % video_id + url = self._html_search_regex(r'\'streamer\': \'(?P<url>[^\']+)\'', playerdata, u'url') + player_url = self._html_search_regex(r'\'flashplayer\': \'(?P<player_url>[^\']+)\'', playerdata, u'player_url') + page_url = re.split(r'(?<=[^/])/([^/]|$)', player_url)[0] + + sd_file = self._html_search_regex(r'\'file\': \'(?P<sd_file>[^\']+)\'', playerdata, u'sd_file') + hd_file = self._html_search_regex(r'\'?file\'?: "(?P<hd_file>[^"]+)"', playerdata, u'hd_file') + video_thumbnail = self._html_search_regex(r'\'image\': \'(?P<thumbnail>[^\']+)\'', playerdata, u'thumbnail', fatal=False) formats = [ { - 'url': base_url + sd_file, + 'url': url, + 'player_url': player_url, + 'page_url': page_url, + 'play_path': 'mp4:' + sd_file, 'ext': 'flv', 'format': 'sd', 'format_id': 'sd', }, { - 'url': base_url + hd_file, + 'url': url, + 'player_url': player_url, + 'page_url': page_url, + 'play_path': 'mp4:' + hd_file, 'ext': 'flv', 'format': 'hd', 'format_id': 'hd', From 50a6150ed9a0d5af39c27b5dba1e06deadd767cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= <rbrito@ime.usp.br> Date: Sat, 19 Oct 2013 14:19:25 -0300 Subject: [PATCH 154/264] extractor: Set age limit on some adult-related extractors. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit More age limit of videos for adult-related sites. Note that, for redtube, I explicitly left the variable containing the age limit, since the comment justifying the age limit is a good thing to have. That being said, I included the age limit field on the test, to better reflect what the information extractor does (even if it may not break the automated tests). Signed-off-by: Rogério Brito <rbrito@ime.usp.br> --- youtube_dl/extractor/redtube.py | 3 ++- youtube_dl/extractor/xhamster.py | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index 365aade56..994778e16 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -10,7 +10,8 @@ class RedTubeIE(InfoExtractor): u'file': u'66418.mp4', u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d', u'info_dict': { - u"title": u"Sucked on a toilet" + u"title": u"Sucked on a toilet", + u"age_limit": 18, } } diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 361619694..f060d9066 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -19,7 +19,8 @@ class XHamsterIE(InfoExtractor): u'info_dict': { u"upload_date": u"20121014", u"uploader_id": u"Ruseful2011", - u"title": u"FemaleAgent Shy beauty takes the bait" + u"title": u"FemaleAgent Shy beauty takes the bait", + u"age_limit": 18, } }, { @@ -29,7 +30,8 @@ class XHamsterIE(InfoExtractor): u'info_dict': { u"upload_date": u"20130914", u"uploader_id": u"jojo747400", - u"title": u"Britney Spears Sexy Booty" + u"title": u"Britney Spears Sexy Booty", + u"age_limit": 18, } }] @@ -80,5 +82,6 @@ class XHamsterIE(InfoExtractor): 'description': video_description, 'upload_date': video_upload_date, 'uploader_id': video_uploader_id, - 'thumbnail': video_thumbnail + 'thumbnail': video_thumbnail, + 'age_limit': 18, }] From 9d92015d436af21b1a6ef00837a1984d76b259e1 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 19 Oct 2013 21:09:48 +0200 Subject: [PATCH 155/264] [xhamster] Add support for age_limit (Instead of #1627) --- youtube_dl/extractor/xhamster.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 361619694..81c4be326 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -19,7 +19,8 @@ class XHamsterIE(InfoExtractor): u'info_dict': { u"upload_date": u"20121014", u"uploader_id": u"Ruseful2011", - u"title": u"FemaleAgent Shy beauty takes the bait" + u"title": u"FemaleAgent Shy beauty takes the bait", + u"age_limit": 18, } }, { @@ -27,9 +28,10 @@ class XHamsterIE(InfoExtractor): u'file': u'2221348.flv', u'md5': u'e767b9475de189320f691f49c679c4c7', u'info_dict': { - u"upload_date": u"20130914", - u"uploader_id": u"jojo747400", - u"title": u"Britney Spears Sexy Booty" + u"upload_date": u"20130914", + u"uploader_id": u"jojo747400", + u"title": u"Britney Spears Sexy Booty", + u"age_limit": 18, } }] @@ -72,6 +74,8 @@ class XHamsterIE(InfoExtractor): video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'', webpage, u'thumbnail', fatal=False) + age_limit = self._rta_search(webpage) + return [{ 'id': video_id, 'url': video_url, @@ -80,5 +84,6 @@ class XHamsterIE(InfoExtractor): 'description': video_description, 'upload_date': video_upload_date, 'uploader_id': video_uploader_id, - 'thumbnail': video_thumbnail + 'thumbnail': video_thumbnail, + 'age_limit': age_limit, }] From a9c58ad945e88e8eadbbff9c165c19b46805063b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 21 Oct 2013 13:19:58 +0200 Subject: [PATCH 156/264] Accept requested formats to be in the format 35/best (closes #1552) The format selection code is now an independent function. --- test/test_YoutubeDL.py | 23 +++++++++++++++++++++++ youtube_dl/YoutubeDL.py | 27 ++++++++++++++++++--------- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index ba6dc05bc..2073bc4df 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -94,6 +94,29 @@ class TestFormatSelection(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded[u'format_id'], u'excellent') + def test_format_selection(self): + formats = [ + {u'format_id': u'35'}, + {u'format_id': u'47'}, + {u'format_id': u'2'}, + ] + info_dict = {u'formats': formats, u'extractor': u'test'} + + ydl = YDL({'format': u'20/47'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], u'47') + + ydl = YDL({'format': u'20/71/worst'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], u'35') + + ydl = YDL() + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], u'2') + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 296c0f992..bc69214e7 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -448,6 +448,17 @@ class YoutubeDL(object): else: raise Exception('Invalid result type: %s' % result_type) + def select_format(self, format_spec, available_formats): + if format_spec == 'best' or format_spec is None: + return available_formats[-1] + elif format_spec == 'worst': + return available_formats[0] + else: + matches = list(filter(lambda f:f['format_id'] == format_spec ,available_formats)) + if matches: + return matches[-1] + return None + def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' @@ -502,22 +513,20 @@ class YoutubeDL(object): formats = sorted(formats, key=_free_formats_key) req_format = self.params.get('format', 'best') + if req_format is None: + req_format = 'best' formats_to_download = [] - if req_format == 'best' or req_format is None: - formats_to_download = [formats[-1]] - elif req_format == 'worst': - formats_to_download = [formats[0]] # The -1 is for supporting YoutubeIE - elif req_format in ('-1', 'all'): + if req_format in ('-1', 'all'): formats_to_download = formats else: - # We can accept formats requestd in the format: 34/10/5, we pick + # We can accept formats requestd in the format: 34/5/best, we pick # the first that is available, starting from left req_formats = req_format.split('/') for rf in req_formats: - matches = filter(lambda f:f['format_id'] == rf ,formats) - if matches: - formats_to_download = [matches[0]] + selected_format = self.select_format(rf, formats) + if selected_format is not None: + formats_to_download = [selected_format] break if not formats_to_download: raise ExtractorError(u'requested format not available') From 49e86983e7639223644e1de2643745acc66f2535 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 21 Oct 2013 13:31:55 +0200 Subject: [PATCH 157/264] Allow to use the extension for the format selection The best format with the extension is downloaded. --- test/test_YoutubeDL.py | 17 ++++++++++++++--- youtube_dl/YoutubeDL.py | 7 ++++++- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 2073bc4df..f8cd1bdce 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -96,9 +96,10 @@ class TestFormatSelection(unittest.TestCase): def test_format_selection(self): formats = [ - {u'format_id': u'35'}, - {u'format_id': u'47'}, - {u'format_id': u'2'}, + {u'format_id': u'35', u'ext': u'mp4'}, + {u'format_id': u'45', u'ext': u'webm'}, + {u'format_id': u'47', u'ext': u'webm'}, + {u'format_id': u'2', u'ext': u'flv'}, ] info_dict = {u'formats': formats, u'extractor': u'test'} @@ -117,6 +118,16 @@ class TestFormatSelection(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], u'2') + ydl = YDL({'format': u'webm/mp4'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], u'47') + + ydl = YDL({'format': u'3gp/40/mp4'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], u'35') + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index bc69214e7..32f21e21a 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -454,7 +454,12 @@ class YoutubeDL(object): elif format_spec == 'worst': return available_formats[0] else: - matches = list(filter(lambda f:f['format_id'] == format_spec ,available_formats)) + extensions = [u'mp4', u'flv', u'webm', u'3gp'] + if format_spec in extensions: + filter_f = lambda f: f['ext'] == format_spec + else: + filter_f = lambda f: f['format_id'] == format_spec + matches = list(filter(filter_f ,available_formats)) if matches: return matches[-1] return None From 3fd39e37f2b767e4c66518a6fe0f620344c31825 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 21 Oct 2013 13:52:24 +0200 Subject: [PATCH 158/264] YoutubeDL: remove method that came from FileDownloader --- youtube_dl/YoutubeDL.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 32f21e21a..a837971b0 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -234,19 +234,6 @@ class YoutubeDL(object): error_message = u'%s %s' % (_msg_header, message) self.trouble(error_message, tb) - def slow_down(self, start_time, byte_counter): - """Sleep if the download speed is over the rate limit.""" - rate_limit = self.params.get('ratelimit', None) - if rate_limit is None or byte_counter == 0: - return - now = time.time() - elapsed = now - start_time - if elapsed <= 0.0: - return - speed = float(byte_counter) / elapsed - if speed > rate_limit: - time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit) - def report_writedescription(self, descfn): """ Report that the description file is being written """ self.to_screen(u'[info] Writing video description to: ' + descfn) From 8c51aa6506c85f3826b55c35a1ebf470bd4ebe3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 21 Oct 2013 14:09:38 +0200 Subject: [PATCH 159/264] The 'format' field now defaults to '{format_id} - {width}x{height}{format_note}' Following the YoutubeIE format. The 'format_note' gives additional info about the format, for example '3D' or 'DASH video'. --- youtube_dl/YoutubeDL.py | 42 ++++++++++++++++++++++------------ youtube_dl/extractor/common.py | 5 +++- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index a837971b0..5f70b6dac 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -473,17 +473,14 @@ class YoutubeDL(object): # We check that all the formats have the format and format_id fields for (i, format) in enumerate(formats): - if format.get('format') is None: - if format.get('height') is not None: - if format.get('width') is not None: - format_desc = u'%sx%s' % (format['width'], format['height']) - else: - format_desc = u'%sp' % format['height'] - else: - format_desc = '???' - format['format'] = format_desc if format.get('format_id') is None: format['format_id'] = compat_str(i) + if format.get('format') is None: + format['format'] = u'{id} - {res}{note}'.format( + id=format['format_id'], + res=self.format_resolution(format), + note = u' ({})'.format(format['format_note']) if format.get('format_note') is not None else '', + ) if self.params.get('listformats', None): self.list_formats(info_dict) @@ -753,16 +750,31 @@ class YoutubeDL(object): with locked_file(fn, 'a', encoding='utf-8') as archive_file: archive_file.write(vid_id + u'\n') + @staticmethod + def format_resolution(format): + if format.get('height') is not None: + if format.get('width') is not None: + res = u'%sx%s' % (format['width'], format['height']) + else: + res = u'%sp' % format['height'] + else: + res = '???' + return res + def list_formats(self, info_dict): formats_s = [] for format in info_dict.get('formats', [info_dict]): - formats_s.append("%s\t:\t%s\t[%s]" % (format['format_id'], - format['ext'], - format.get('format', '???'), - ) - ) + formats_s.append(u'%-15s: %-5s %-15s[%s]' % ( + format['format_id'], + format['ext'], + format.get('format_note') or '-', + self.format_resolution(format), + ) + ) if len(formats_s) != 1: formats_s[0] += ' (worst)' formats_s[-1] += ' (best)' formats_s = "\n".join(formats_s) - self.to_screen(u"[info] Available formats for %s:\nformat code\textension\n%s" % (info_dict['id'], formats_s)) + self.to_screen(u'[info] Available formats for %s:\n' + u'format code extension note resolution\n%s' % ( + info_dict['id'], formats_s)) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index d4af3b5eb..7d7ce5d98 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -61,9 +61,12 @@ class InfoExtractor(object): * ext Will be calculated from url if missing * format A human-readable description of the format ("mp4 container with h264/opus"). - Calculated from width and height if missing. + Calculated from the format_id, width, height + and format_note fields if missing. * format_id A short description of the format ("mp4_h264_opus" or "19") + * format_note Additional info about the format + ("3D" or "DASH video") * width Width of the video, if known * height Height of the video, if known From 182a107877786516f612535a9ad8db5d7487e9e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 21 Oct 2013 14:40:06 +0200 Subject: [PATCH 160/264] [arte] Set the format_note and the format_id fields (closes #1628) --- youtube_dl/extractor/arte.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 5ee8a67b1..d8e32f187 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -174,12 +174,27 @@ class ArteTVPlus7IE(InfoExtractor): # Some formats use the m3u8 protocol formats = filter(lambda f: f.get('videoFormat') != 'M3U8', formats) # We order the formats by quality - formats = sorted(formats, key=lambda f: int(f.get('height',-1))) + formats = list(formats) # in python3 filter returns an iterator + if re.match(r'[A-Z]Q', formats[0]['quality']) is not None: + sort_key = lambda f: ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality']) + else: + sort_key = lambda f: int(f.get('height',-1)) + formats = sorted(formats, key=sort_key) # Prefer videos without subtitles in the same language formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f.get('versionCode', '')) is None) # Pick the best quality def _format(format_info): + quality = format_info['quality'] + m_quality = re.match(r'\w*? - (\d*)p', quality) + if m_quality is not None: + quality = m_quality.group(1) + if format_info.get('versionCode') is not None: + format_id = u'%s-%s' % (quality, format_info['versionCode']) + else: + format_id = quality info = { + 'format_id': format_id, + 'format_note': format_info.get('versionLibelle'), 'width': format_info.get('width'), 'height': format_info.get('height'), } @@ -192,8 +207,6 @@ class ArteTVPlus7IE(InfoExtractor): info['ext'] = determine_ext(info['url']) return info info_dict['formats'] = [_format(f) for f in formats] - # TODO: Remove when #980 has been merged - info_dict.update(info_dict['formats'][-1]) return info_dict From 685a9cd2f15d4e3b7d18df7ac639e7daf1a84483 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 21 Oct 2013 15:00:21 +0200 Subject: [PATCH 161/264] [googleplus] Fix upload_date extraction --- youtube_dl/extractor/googleplus.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index ab12d7e93..2570746b2 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -41,9 +41,9 @@ class GooglePlusIE(InfoExtractor): # Extract update date upload_date = self._html_search_regex( - r'''(?x)<a.+?class="o-T-s\s[^"]+"\s+style="display:\s*none"\s*> + r'''(?x)<a.+?class="o-U-s\s[^"]+"\s+style="display:\s*none"\s*> ([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''', - webpage, u'upload date', fatal=False) + webpage, u'upload date', fatal=False, flags=re.VERBOSE) if upload_date: # Convert timestring to a format suitable for filename upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d") From cbbd9a9c6984ed42f55b13fbb97a3d7f98f77aae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 21 Oct 2013 15:07:33 +0200 Subject: [PATCH 162/264] Fix the duration field for the VideoDetective and InternetVideoArchive tests Also remove the use of the old format system and the comment --- youtube_dl/extractor/internetvideoarchive.py | 7 ++----- youtube_dl/extractor/videodetective.py | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py index 5986459d6..be8e05f53 100644 --- a/youtube_dl/extractor/internetvideoarchive.py +++ b/youtube_dl/extractor/internetvideoarchive.py @@ -19,7 +19,7 @@ class InternetVideoArchiveIE(InfoExtractor): u'info_dict': { u'title': u'SKYFALL', u'description': u'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.', - u'duration': 156, + u'duration': 153, }, } @@ -74,7 +74,7 @@ class InternetVideoArchiveIE(InfoExtractor): }) formats = sorted(formats, key=lambda f: f['bitrate']) - info = { + return { 'id': video_id, 'title': item.find('title').text, 'formats': formats, @@ -82,6 +82,3 @@ class InternetVideoArchiveIE(InfoExtractor): 'description': item.find('description').text, 'duration': int(attr['duration']), } - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - return info diff --git a/youtube_dl/extractor/videodetective.py b/youtube_dl/extractor/videodetective.py index d89f84094..265dd5b91 100644 --- a/youtube_dl/extractor/videodetective.py +++ b/youtube_dl/extractor/videodetective.py @@ -16,7 +16,7 @@ class VideoDetectiveIE(InfoExtractor): u'info_dict': { u'title': u'KICK-ASS 2', u'description': u'md5:65ba37ad619165afac7d432eaded6013', - u'duration': 138, + u'duration': 135, }, } From f6a54188c230d76a659595640850b093f1d06b91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 21 Oct 2013 16:28:55 +0200 Subject: [PATCH 163/264] [youtube] Use 'node is None' when checking if the video has automatic captions It had stopped working and it reports a FutureWarning --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index fb7c42830..a88cba2b4 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1150,7 +1150,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): list_page = self._download_webpage(list_url, video_id) caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8')) original_lang_node = caption_list.find('track') - if not original_lang_node or original_lang_node.attrib.get('kind') != 'asr' : + if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' : self._downloader.report_warning(u'Video doesn\'t have automatic captions') return {} original_lang = original_lang_node.attrib['lang_code'] From a6387bfd3cc65d8780bb4d6272b51c6268b45988 Mon Sep 17 00:00:00 2001 From: Joshua Elsasser <joshua@elsasser.org> Date: Fri, 5 Jul 2013 09:10:57 -0700 Subject: [PATCH 164/264] [vimeo] Implement the new format selection system (closes PR #996) Rebased and deleted some parts to use the new system instead of copying the one from YoutubeIE --- youtube_dl/extractor/vimeo.py | 49 +++++++++++++++++------------------ 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 2de56ac81..1125513c7 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -179,46 +179,45 @@ class VimeoIE(InfoExtractor): # Vimeo specific: extract video codec and quality information # First consider quality, then codecs, then take everything - # TODO bind to format param - codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')] + codecs = [('vp6', 'flv'), ('vp8', 'flv'), ('h264', 'mp4')] files = { 'hd': [], 'sd': [], 'other': []} config_files = config["video"].get("files") or config["request"].get("files") for codec_name, codec_extension in codecs: - if codec_name in config_files: - if 'hd' in config_files[codec_name]: - files['hd'].append((codec_name, codec_extension, 'hd')) - elif 'sd' in config_files[codec_name]: - files['sd'].append((codec_name, codec_extension, 'sd')) + for quality in config_files.get(codec_name, []): + format_id = '-'.join((codec_name, quality)).lower() + key = quality if quality in files else 'other' + video_url = None + if isinstance(config_files[codec_name], dict): + file_info = config_files[codec_name][quality] + video_url = file_info.get('url') else: - files['other'].append((codec_name, codec_extension, config_files[codec_name][0])) + file_info = {} + if video_url is None: + video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \ + %(video_id, sig, timestamp, quality, codec_name.upper()) - for quality in ('hd', 'sd', 'other'): - if len(files[quality]) > 0: - video_quality = files[quality][0][2] - video_codec = files[quality][0][0] - video_extension = files[quality][0][1] - self.to_screen(u'%s: Downloading %s file at %s quality' % (video_id, video_codec.upper(), video_quality)) - break - else: + files[key].append({ + 'ext': codec_extension, + 'url': video_url, + 'format_id': format_id, + 'width': file_info.get('width'), + 'height': file_info.get('height'), + }) + formats = [] + for key in ('other', 'sd', 'hd'): + formats += files[key] + if len(formats) == 0: raise ExtractorError(u'No known codec found') - video_url = None - if isinstance(config_files[video_codec], dict): - video_url = config_files[video_codec][video_quality].get("url") - if video_url is None: - video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \ - %(video_id, sig, timestamp, video_quality, video_codec.upper()) - return [{ 'id': video_id, - 'url': video_url, 'uploader': video_uploader, 'uploader_id': video_uploader_id, 'upload_date': video_upload_date, 'title': video_title, - 'ext': video_extension, 'thumbnail': video_thumbnail, 'description': video_description, + 'formats': formats, }] From 12893efe01b88a72595f50cbc692d308419001c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Tue, 22 Oct 2013 00:01:59 +0200 Subject: [PATCH 165/264] Respect the download parameter in YoutubeDL.process_video_result if the extractor handle the format selection --- youtube_dl/YoutubeDL.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 5f70b6dac..577e27b11 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -461,7 +461,8 @@ class YoutubeDL(object): # This extractors handle format selection themselves if info_dict['extractor'] in [u'youtube', u'Youku', u'YouPorn', u'mixcloud']: - self.process_info(info_dict) + if download: + self.process_info(info_dict) return info_dict # We now pick which formats have to be downloaded From fe7e0c982530e62f3af001f7ac50c8c4b4abb5cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Tue, 22 Oct 2013 14:49:34 +0200 Subject: [PATCH 166/264] Style fixes in YoutubeDL.py Fixed some of the problems reported by pep8 --- youtube_dl/YoutubeDL.py | 58 ++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 577e27b11..f9a6da520 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -91,7 +91,7 @@ class YoutubeDL(object): downloadarchive: File name of a file where all downloads are recorded. Videos already present in the file are not downloaded again. - + The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, @@ -216,10 +216,10 @@ class YoutubeDL(object): If stderr is a tty file the 'WARNING:' will be colored ''' if sys.stderr.isatty() and os.name != 'nt': - _msg_header=u'\033[0;33mWARNING:\033[0m' + _msg_header = u'\033[0;33mWARNING:\033[0m' else: - _msg_header=u'WARNING:' - warning_message=u'%s %s' % (_msg_header,message) + _msg_header = u'WARNING:' + warning_message = u'%s %s' % (_msg_header, message) self.to_stderr(warning_message) def report_error(self, message, tb=None): @@ -275,11 +275,11 @@ class YoutubeDL(object): if template_dict['playlist_index'] is not None: template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index'] - sanitize = lambda k,v: sanitize_filename( + sanitize = lambda k, v: sanitize_filename( u'NA' if v is None else compat_str(v), restricted=self.params.get('restrictfilenames'), - is_id=(k==u'id')) - template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items()) + is_id=k == u'id') + template_dict = dict((k, sanitize(k, v)) for k, v in template_dict.items()) filename = self.params['outtmpl'] % template_dict return filename @@ -315,14 +315,14 @@ class YoutubeDL(object): return (u'%(title)s has already been recorded in archive' % info_dict) return None - + def extract_info(self, url, download=True, ie_key=None, extra_info={}): ''' Returns a list with a dictionary for each video we find. If 'download', also downloads the videos. extra_info is a dict containing the extra values to add to each result ''' - + if ie_key: ies = [self.get_info_extractor(ie_key)] else: @@ -364,7 +364,7 @@ class YoutubeDL(object): raise else: self.report_error(u'no suitable InfoExtractor: %s' % url) - + def process_ie_result(self, ie_result, download=True, extra_info={}): """ Take the result of the ie(may be modified) and resolve all unresolved @@ -388,7 +388,7 @@ class YoutubeDL(object): elif result_type == 'playlist': # We process each entry in the playlist playlist = ie_result.get('title', None) or ie_result.get('id', None) - self.to_screen(u'[download] Downloading playlist: %s' % playlist) + self.to_screen(u'[download] Downloading playlist: %s' % playlist) playlist_results = [] @@ -406,12 +406,12 @@ class YoutubeDL(object): self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" % (ie_result['extractor'], playlist, n_all_entries, n_entries)) - for i,entry in enumerate(entries,1): - self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries)) + for i, entry in enumerate(entries, 1): + self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries)) extra = { - 'playlist': playlist, - 'playlist_index': i + playliststart, - } + 'playlist': playlist, + 'playlist_index': i + playliststart, + } if not 'extractor' in entry: # We set the extractor, if it's an url it will be set then to # the new extractor, but if it's already a video we must make @@ -446,7 +446,7 @@ class YoutubeDL(object): filter_f = lambda f: f['ext'] == format_spec else: filter_f = lambda f: f['format_id'] == format_spec - matches = list(filter(filter_f ,available_formats)) + matches = list(filter(filter_f, available_formats)) if matches: return matches[-1] return None @@ -480,7 +480,7 @@ class YoutubeDL(object): format['format'] = u'{id} - {res}{note}'.format( id=format['format_id'], res=self.format_resolution(format), - note = u' ({})'.format(format['format_note']) if format.get('format_note') is not None else '', + note=u' ({})'.format(format['format_note']) if format.get('format_note') is not None else '', ) if self.params.get('listformats', None): @@ -607,20 +607,20 @@ class YoutubeDL(object): if self.params.get('writeannotations', False): try: - annofn = filename + u'.annotations.xml' - self.report_writeannotations(annofn) - with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: - annofile.write(info_dict['annotations']) + annofn = filename + u'.annotations.xml' + self.report_writeannotations(annofn) + with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: + annofile.write(info_dict['annotations']) except (KeyError, TypeError): self.report_warning(u'There are no annotations to write.') except (OSError, IOError): - self.report_error(u'Cannot write annotations file: ' + annofn) - return + self.report_error(u'Cannot write annotations file: ' + annofn) + return subtitles_are_requested = any([self.params.get('writesubtitles', False), self.params.get('writeautomaticsub')]) - if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']: + if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']: # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE subtitles = info_dict['subtitles'] @@ -642,7 +642,7 @@ class YoutubeDL(object): infofn = filename + u'.info.json' self.report_writeinfojson(infofn) try: - json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle']) + json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle']) write_json_file(json_info_dict, encodeFilename(infofn)) except (OSError, IOError): self.report_error(u'Cannot write metadata to JSON file ' + infofn) @@ -712,7 +712,7 @@ class YoutubeDL(object): keep_video = None for pp in self._pps: try: - keep_video_wish,new_info = pp.run(info) + keep_video_wish, new_info = pp.run(info) if keep_video_wish is not None: if keep_video_wish: keep_video = keep_video_wish @@ -759,7 +759,7 @@ class YoutubeDL(object): else: res = u'%sp' % format['height'] else: - res = '???' + res = '???' return res def list_formats(self, info_dict): @@ -773,7 +773,7 @@ class YoutubeDL(object): ) ) if len(formats_s) != 1: - formats_s[0] += ' (worst)' + formats_s[0] += ' (worst)' formats_s[-1] += ' (best)' formats_s = "\n".join(formats_s) self.to_screen(u'[info] Available formats for %s:\n' From ce68b5907ca3999fb3e3f0d9bcfbaa7823c46623 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Tue, 22 Oct 2013 21:01:16 +0200 Subject: [PATCH 167/264] [nhl:videocenter] Fix playlist title extraction --- youtube_dl/extractor/nhl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index e8d43dd13..224f56ac8 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -90,8 +90,8 @@ class NHLVideocenterIE(NHLBaseInfoExtractor): r'{statusIndex:0,index:0,.*?id:(.*?),'], webpage, u'category id') playlist_title = self._html_search_regex( - r'\?catid=%s">(.*?)</a>' % cat_id, - webpage, u'playlist title', flags=re.DOTALL) + r'tab0"[^>]*?>(.*?)</td>', + webpage, u'playlist title', flags=re.DOTALL).lower().capitalize() data = compat_urllib_parse.urlencode({ 'cid': cat_id, From b028e96144a2bf1ba84dd6d11f1cc13a2928c438 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Tue, 22 Oct 2013 21:06:06 +0200 Subject: [PATCH 168/264] [arte.tv:creative] Update the title of the test --- youtube_dl/extractor/arte.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index d8e32f187..d39b48951 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -220,7 +220,7 @@ class ArteTVCreativeIE(ArteTVPlus7IE): u'url': u'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design', u'file': u'050489-002.mp4', u'info_dict': { - u'title': u'Agentur Amateur #2 - Corporate Design', + u'title': u'Agentur Amateur / Agence Amateur #2 : Corporate Design', }, } From 586a91b67f6fe7254beefc3831b4e4649f84f0ce Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Tue, 22 Oct 2013 22:28:19 +0200 Subject: [PATCH 169/264] Expand tilde in template (Fixes #1639) --- youtube_dl/YoutubeDL.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 296c0f992..095692bb3 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -288,13 +288,15 @@ class YoutubeDL(object): if template_dict['playlist_index'] is not None: template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index'] - sanitize = lambda k,v: sanitize_filename( + sanitize = lambda k, v: sanitize_filename( u'NA' if v is None else compat_str(v), restricted=self.params.get('restrictfilenames'), - is_id=(k==u'id')) - template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items()) + is_id=(k == u'id')) + template_dict = dict((k, sanitize(k, v)) + for k, v in template_dict.items()) - filename = self.params['outtmpl'] % template_dict + tmpl = os.path.expanduser(self.params['outtmpl']) + filename = tmpl % template_dict return filename except KeyError as err: self.report_error(u'Erroneous output template') From 80f55a951185de7414d31e26a3981f01cd597596 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Tue, 22 Oct 2013 22:35:13 +0200 Subject: [PATCH 170/264] release 2013.10.22 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e2cc6423d..3482e6089 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.18.2' +__version__ = '2013.10.22' From df1c39ec5ca16237b693fc13cd7e17d86af4f76f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 23 Oct 2013 00:07:27 +0200 Subject: [PATCH 171/264] release 2013.10.23 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3482e6089..a5b56d894 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.22' +__version__ = '2013.10.23' From 71907db3ba28b1d32c3294d9e3bec0c08fb98ad3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 23 Oct 2013 11:38:51 +0200 Subject: [PATCH 172/264] [vimeo] Fix normal videos (Fixes #1642) Vimeo Pro Videos are still broken --- youtube_dl/extractor/vimeo.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 1125513c7..bf48671b3 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -25,7 +25,7 @@ class VimeoIE(InfoExtractor): { u'url': u'http://vimeo.com/56015672', u'file': u'56015672.mp4', - u'md5': u'8879b6cc097e987f02484baf890129e5', + u'md5': u'ae7a1d8b183758a0506b0622f37dfa14', u'info_dict': { u"upload_date": u"20121220", u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", @@ -129,10 +129,11 @@ class VimeoIE(InfoExtractor): # Extract the config JSON try: - config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'], - webpage, u'info section', flags=re.DOTALL) - config = json.loads(config) - except: + config_url = self._html_search_regex( + r' data-config-url="(.+?)"', webpage, u'config URL') + config_json = self._download_webpage(config_url, video_id) + config = json.loads(config_json) + except Exception as e: if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): raise ExtractorError(u'The author has restricted the access to this video, try with the "--referer" option') @@ -140,7 +141,8 @@ class VimeoIE(InfoExtractor): self._verify_video_password(url, video_id, webpage) return self._real_extract(url) else: - raise ExtractorError(u'Unable to extract info section') + raise ExtractorError(u'Unable to extract info section', + cause=e) # Extract title video_title = config["video"]["title"] From 55b3e45bbab3af5132d45c8f3f8f19fae5f5f1d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Wed, 23 Oct 2013 14:38:03 +0200 Subject: [PATCH 173/264] [vimeo] Fix pro videos and player.vimeo.com urls The old process can still be used for those videos. Added RegexNotFoundError, which is raised by _search_regex if it can't extract the info. --- youtube_dl/extractor/common.py | 5 +++-- youtube_dl/extractor/vimeo.py | 6 ++++++ youtube_dl/utils.py | 5 +++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7d7ce5d98..aaa5c24c8 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -14,6 +14,7 @@ from ..utils import ( clean_html, compiled_regex_type, ExtractorError, + RegexNotFoundError, unescapeHTML, ) @@ -231,7 +232,7 @@ class InfoExtractor(object): Perform a regex search on the given string, using a single or a list of patterns returning the first matching group. In case of failure return a default value or raise a WARNING or a - ExtractorError, depending on fatal, specifying the field name. + RegexNotFoundError, depending on fatal, specifying the field name. """ if isinstance(pattern, (str, compat_str, compiled_regex_type)): mobj = re.search(pattern, string, flags) @@ -251,7 +252,7 @@ class InfoExtractor(object): elif default is not None: return default elif fatal: - raise ExtractorError(u'Unable to extract %s' % _name) + raise RegexNotFoundError(u'Unable to extract %s' % _name) else: self._downloader.report_warning(u'unable to extract %s; ' u'please report this issue on http://yt-dl.org/bug' % _name) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index bf48671b3..ad2f75d6b 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -10,6 +10,7 @@ from ..utils import ( clean_html, get_element_by_attribute, ExtractorError, + RegexNotFoundError, std_headers, unsmuggle_url, ) @@ -133,6 +134,11 @@ class VimeoIE(InfoExtractor): r' data-config-url="(.+?)"', webpage, u'config URL') config_json = self._download_webpage(config_url, video_id) config = json.loads(config_json) + except RegexNotFoundError: + # For pro videos or player.vimeo.com urls + config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'], + webpage, u'info section', flags=re.DOTALL) + config = json.loads(config) except Exception as e: if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): raise ExtractorError(u'The author has restricted the access to this video, try with the "--referer" option') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index bfb8f6bcd..1d9785341 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -572,6 +572,11 @@ class ExtractorError(Exception): return u''.join(traceback.format_tb(self.traceback)) +class RegexNotFoundError(ExtractorError): + """Error when a regex didn't match""" + pass + + class DownloadError(Exception): """Download Error exception. From 0a89b2852e927914ecd5643d956449a4841a3141 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 23 Oct 2013 15:12:33 +0200 Subject: [PATCH 174/264] release 2013.10.23.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a5b56d894..df6002970 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.23' +__version__ = '2013.10.23.1' From 93b22c7828911668c503e868d6be053e8a0deb7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Wed, 23 Oct 2013 16:31:53 +0200 Subject: [PATCH 175/264] [vimeo] fix the extraction for videos protected with password Added a test video. --- youtube_dl/extractor/vimeo.py | 39 +++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index ad2f75d6b..ef90fecc0 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -1,3 +1,4 @@ +# encoding: utf-8 import json import re import itertools @@ -55,7 +56,22 @@ class VimeoIE(InfoExtractor): u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software', u'uploader': u'The BLN & Business of Software', }, - } + }, + { + u'url': u'http://vimeo.com/68375962', + u'file': u'68375962.mp4', + u'md5': u'aaf896bdb7ddd6476df50007a0ac0ae7', + u'note': u'Video protected with password', + u'info_dict': { + u'title': u'youtube-dl password protected test video', + u'upload_date': u'20130614', + u'uploader_id': u'user18948128', + u'uploader': u'Jaime Marquínez Ferrándiz', + }, + u'params': { + u'videopassword': u'youtube-dl', + }, + }, ] def _login(self): @@ -130,20 +146,21 @@ class VimeoIE(InfoExtractor): # Extract the config JSON try: - config_url = self._html_search_regex( - r' data-config-url="(.+?)"', webpage, u'config URL') - config_json = self._download_webpage(config_url, video_id) - config = json.loads(config_json) - except RegexNotFoundError: - # For pro videos or player.vimeo.com urls - config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'], - webpage, u'info section', flags=re.DOTALL) - config = json.loads(config) + try: + config_url = self._html_search_regex( + r' data-config-url="(.+?)"', webpage, u'config URL') + config_json = self._download_webpage(config_url, video_id) + config = json.loads(config_json) + except RegexNotFoundError: + # For pro videos or player.vimeo.com urls + config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'], + webpage, u'info section', flags=re.DOTALL) + config = json.loads(config) except Exception as e: if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): raise ExtractorError(u'The author has restricted the access to this video, try with the "--referer" option') - if re.search('If so please provide the correct password.', webpage): + if re.search('<form[^>]+?id="pw_form"', webpage) is not None: self._verify_video_password(url, video_id, webpage) return self._real_extract(url) else: From 3126050c0fe204dfb2669f794097648b9c9fa8c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Wed, 23 Oct 2013 16:32:17 +0200 Subject: [PATCH 176/264] Hide the video password on verbose mode --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index fce1adf0c..c141dcdda 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -133,7 +133,7 @@ def parseOpts(overrideArguments=None): def _hide_login_info(opts): opts = list(opts) - for private_opt in ['-p', '--password', '-u', '--username']: + for private_opt in ['-p', '--password', '-u', '--username', '--video-password']: try: i = opts.index(private_opt) opts[i+1] = '<PRIVATE>' From 2450bcb28b46a6cb3d9f9accfdfd3ef6b7ac5f66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Wed, 23 Oct 2013 17:00:33 +0200 Subject: [PATCH 177/264] [nowvideo] Fix key extraction Extract it from the embed page --- youtube_dl/extractor/nowvideo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nowvideo.py b/youtube_dl/extractor/nowvideo.py index ab52ad401..241cc160b 100644 --- a/youtube_dl/extractor/nowvideo.py +++ b/youtube_dl/extractor/nowvideo.py @@ -20,7 +20,10 @@ class NowVideoIE(InfoExtractor): video_id = mobj.group('id') webpage_url = 'http://www.nowvideo.ch/video/' + video_id + embed_url = 'http://embed.nowvideo.ch/embed.php?v=' + video_id webpage = self._download_webpage(webpage_url, video_id) + embed_page = self._download_webpage(embed_url, video_id, + u'Downloading embed page') self.report_extraction(video_id) @@ -28,7 +31,7 @@ class NowVideoIE(InfoExtractor): webpage, u'video title') video_key = self._search_regex(r'var fkzd="(.*)";', - webpage, u'video key') + embed_page, u'video key') api_call = "http://www.nowvideo.ch/api/player.api.php?file={0}&numOfErrors=0&cid=1&key={1}".format(video_id, video_key) api_response = self._download_webpage(api_call, video_id, From cdec0190c48b90465c5340f7d8af1370dae2cc67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Wed, 23 Oct 2013 17:33:38 +0200 Subject: [PATCH 178/264] [dailymotion] Extract all the available formats (closes #1028) --- youtube_dl/extractor/dailymotion.py | 41 +++++++++++++++++++---------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 7d8353946..4c0488245 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -28,6 +28,15 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)' IE_NAME = u'dailymotion' + + _FORMATS = [ + (u'stream_h264_ld_url', u'ld'), + (u'stream_h264_url', u'standard'), + (u'stream_h264_hq_url', u'hq'), + (u'stream_h264_hd_url', u'hd'), + (u'stream_h264_hd1080_url', u'hd180'), + ] + _TESTS = [ { u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech', @@ -60,7 +69,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): video_id = mobj.group(1).split('_')[0].split('?')[0] - video_extension = 'mp4' url = 'http://www.dailymotion.com/video/%s' % video_id # Retrieve video webpage to extract further information @@ -99,18 +107,24 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title'] raise ExtractorError(msg, expected=True) - # TODO: support choosing qualities - - for key in ['stream_h264_hd1080_url','stream_h264_hd_url', - 'stream_h264_hq_url','stream_h264_url', - 'stream_h264_ld_url']: - if info.get(key):#key in info and info[key]: - max_quality = key - self.to_screen(u'Using %s' % key) - break - else: + formats = [] + for (key, format_id) in self._FORMATS: + video_url = info.get(key) + if video_url is not None: + m_size = re.search(r'H264-(\d+)x(\d+)', video_url) + if m_size is not None: + width, height = m_size.group(1), m_size.group(2) + else: + width, height = None, None + formats.append({ + 'url': video_url, + 'ext': 'mp4', + 'format_id': format_id, + 'width': width, + 'height': height, + }) + if not formats: raise ExtractorError(u'Unable to extract video URL') - video_url = info[max_quality] # subtitles video_subtitles = self.extract_subtitles(video_id) @@ -120,11 +134,10 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): return [{ 'id': video_id, - 'url': video_url, + 'formats': formats, 'uploader': video_uploader, 'upload_date': video_upload_date, 'title': self._og_search_title(webpage), - 'ext': video_extension, 'subtitles': video_subtitles, 'thumbnail': info['thumbnail_url'] }] From 1cf64ee4685e0f26b2d4dc28d9635351a36007b6 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 23 Oct 2013 18:38:09 +0200 Subject: [PATCH 179/264] release 2013.10.23.2 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index df6002970..b4ce6068f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.23.1' +__version__ = '2013.10.23.2' From fcc28edb2f86bb62ab8b3fcbacf0818991cd3058 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Wed, 23 Oct 2013 20:21:25 +0200 Subject: [PATCH 180/264] [cinemassacre] Simplify * Remove some rtmp parameters that are not needed. * Remove the md5 checksums, the video is not downloaded. * Remove the code used before the current format system. --- youtube_dl/extractor/cinemassacre.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py index 8260e8192..2fe1033f0 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/cinemassacre.py @@ -12,7 +12,6 @@ class CinemassacreIE(InfoExtractor): _TESTS = [{ u'url': u'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', u'file': u'19911.flv', - u'md5': u'f9bb7ede54d1229c9846e197b4737e06', u'info_dict': { u'upload_date': u'20121110', u'title': u'“Angry Video Game Nerd: The Movie” – Trailer', @@ -26,7 +25,6 @@ class CinemassacreIE(InfoExtractor): { u'url': u'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', u'file': u'521be8ef82b16.flv', - u'md5': u'91b248e1e2473d5bff55d6010518111f', u'info_dict': { u'upload_date': u'20131002', u'title': u'The Mummy’s Hand (1940)', @@ -58,8 +56,6 @@ class CinemassacreIE(InfoExtractor): playerdata = self._download_webpage(playerdata_url, video_id) url = self._html_search_regex(r'\'streamer\': \'(?P<url>[^\']+)\'', playerdata, u'url') - player_url = self._html_search_regex(r'\'flashplayer\': \'(?P<player_url>[^\']+)\'', playerdata, u'player_url') - page_url = re.split(r'(?<=[^/])/([^/]|$)', player_url)[0] sd_file = self._html_search_regex(r'\'file\': \'(?P<sd_file>[^\']+)\'', playerdata, u'sd_file') hd_file = self._html_search_regex(r'\'?file\'?: "(?P<hd_file>[^"]+)"', playerdata, u'hd_file') @@ -68,8 +64,6 @@ class CinemassacreIE(InfoExtractor): formats = [ { 'url': url, - 'player_url': player_url, - 'page_url': page_url, 'play_path': 'mp4:' + sd_file, 'ext': 'flv', 'format': 'sd', @@ -77,8 +71,6 @@ class CinemassacreIE(InfoExtractor): }, { 'url': url, - 'player_url': player_url, - 'page_url': page_url, 'play_path': 'mp4:' + hd_file, 'ext': 'flv', 'format': 'hd', @@ -86,7 +78,7 @@ class CinemassacreIE(InfoExtractor): }, ] - info = { + return { 'id': video_id, 'title': video_title, 'formats': formats, @@ -94,6 +86,3 @@ class CinemassacreIE(InfoExtractor): 'upload_date': video_date, 'thumbnail': video_thumbnail, } - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - return info From 00fe14fc758173840f813b339960681e8e7d29d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 25 Oct 2013 16:52:58 +0200 Subject: [PATCH 181/264] [youtube] Also use the 'adaptative_fmts' field from the /get_video_info page (fixes #1649) The 'adaptative_fmts' field from the video page is not added to the 'url_encoded_fmt_stream_map' --- youtube_dl/extractor/youtube.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 7a7bbe265..8fb07d100 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1405,32 +1405,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # this signatures are encrypted if 'url_encoded_fmt_stream_map' not in args: raise ValueError(u'No stream_map present') # caught below - m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map']) + re_signature = re.compile(r'[&,]s=') + m_s = re_signature.search(args['url_encoded_fmt_stream_map']) if m_s is not None: self.to_screen(u'%s: Encrypted signatures detected.' % video_id) video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']] - m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u'')) + m_s = re_signature.search(args.get('adaptive_fmts', u'')) if m_s is not None: - if 'url_encoded_fmt_stream_map' in video_info: - video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts'] + if 'adaptive_fmts' in video_info: + video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts'] else: - video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']] - elif 'adaptive_fmts' in video_info: - if 'url_encoded_fmt_stream_map' in video_info: - video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0] - else: - video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts'] + video_info['adaptive_fmts'] = [args['adaptive_fmts']] except ValueError: pass if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): self.report_rtmp_download() video_url_list = [(None, video_info['conn'][0])] - elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: - if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]: + elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1: + encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0] + if 'rtmpe%3Dyes' in encoded_url_map: raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True) url_map = {} - for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','): + for url_data_str in encoded_url_map.split(','): url_data = compat_parse_qs(url_data_str) if 'itag' in url_data and 'url' in url_data: url = url_data['url'][0] From ea32fbacc8939e94f7db9c9a5eb167ada2af5f5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 25 Oct 2013 16:55:37 +0200 Subject: [PATCH 182/264] Fix the extensions of two tests with youtube videos The best quality is now a mp4 video. --- youtube_dl/extractor/metacafe.py | 2 +- youtube_dl/extractor/youtube.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index e537648ff..234b9e80f 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -23,7 +23,7 @@ class MetacafeIE(InfoExtractor): _TESTS = [{ u"add_ie": ["Youtube"], u"url": u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/", - u"file": u"_aUehQsCQtM.flv", + u"file": u"_aUehQsCQtM.mp4", u"info_dict": { u"upload_date": u"20090102", u"title": u"The Electric Company | \"Short I\" | PBS KIDS GO!", diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8fb07d100..2884b359c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -348,7 +348,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): }, { u"url": u"http://www.youtube.com/watch?v=1ltcDfZMA3U", - u"file": u"1ltcDfZMA3U.flv", + u"file": u"1ltcDfZMA3U.mp4", u"note": u"Test VEVO video (#897)", u"info_dict": { u"upload_date": u"20070518", From 600cc1a4f0503651e4fd94af967d25dab3645859 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 25 Oct 2013 17:11:29 +0200 Subject: [PATCH 183/264] [youtube] Set the format_id field to the itag of the format (closes #1624) --- youtube_dl/extractor/youtube.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2884b359c..d05d0a8c1 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1480,13 +1480,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') results = [] - for format_param, video_real_url in video_url_list: + for itag, video_real_url in video_url_list: # Extension - video_extension = self._video_extensions.get(format_param, 'flv') + video_extension = self._video_extensions.get(itag, 'flv') - video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension, - self._video_dimensions.get(format_param, '???'), - ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '') + video_format = '{0} - {1}{2}'.format(itag if itag else video_extension, + self._video_dimensions.get(itag, '???'), + ' ('+self._special_itags[itag]+')' if itag in self._special_itags else '') results.append({ 'id': video_id, @@ -1497,6 +1497,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'title': video_title, 'ext': video_extension, 'format': video_format, + 'format_id': itag, 'thumbnail': video_thumbnail, 'description': video_description, 'player_url': player_url, From b5936c0059eae236cfc0b53fadf6bc24f8f8f3a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 25 Oct 2013 17:17:23 +0200 Subject: [PATCH 184/264] Document the %(format_id)s field for the output template --- youtube_dl/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index c141dcdda..a33dec785 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -336,7 +336,8 @@ def parseOpts(overrideArguments=None): '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, ' '%(autonumber)s to get an automatically incremented number, ' '%(ext)s for the filename extension, ' - '%(format)s for the format description (like "22 - 1280x720" or "HD")' + '%(format)s for the format description (like "22 - 1280x720" or "HD"),' + '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"),' '%(upload_date)s for the upload date (YYYYMMDD), ' '%(extractor)s for the provider (youtube, metacafe, etc), ' '%(id)s for the video id , %(playlist)s for the playlist the video is in, ' From 49a25557b082a147c875015ceeecb370671f025c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 25 Oct 2013 23:46:18 +0200 Subject: [PATCH 185/264] [8tracks] Use track count instead of looking at at_last_track property This fixes the error: $ youtube-dl http://8tracks.com/vladmc/counting-stars [8tracks] counting-stars: Downloading webpage [8tracks] counting-stars: Downloading song information 1/4 [8tracks] counting-stars: Downloading song information 2/4 [8tracks] counting-stars: Downloading song information 3/4 [8tracks] counting-stars: Downloading song information 4/4 [8tracks] counting-stars: Downloading song information 5/4 Traceback (most recent call last): File "/usr/lib/python2.7/runpy.py", line 162, in _run_module_as_main "__main__", fname, loader, pkg_name) File "/usr/lib/python2.7/runpy.py", line 72, in _run_code exec code in run_globals File "/home/phihag/projects/youtube-dl/youtube_dl/__main__.py", line 18, in <module> youtube_dl.main() File "/home/phihag/projects/youtube-dl/youtube_dl/__init__.py", line 761, in main _real_main(argv) File "/home/phihag/projects/youtube-dl/youtube_dl/__init__.py", line 714, in _real_main retcode = ydl.download(all_urls) File "/home/phihag/projects/youtube-dl/youtube_dl/YoutubeDL.py", line 701, in download videos = self.extract_info(url) File "/home/phihag/projects/youtube-dl/youtube_dl/YoutubeDL.py", line 342, in extract_info ie_result = ie.extract(url) File "/home/phihag/projects/youtube-dl/youtube_dl/extractor/common.py", line 121, in extract return self._real_extract(url) File "/home/phihag/projects/youtube-dl/youtube_dl/extractor/eighttracks.py", line 111, in _real_extract 'id': track_data['id'], KeyError: 'id' --- youtube_dl/extractor/eighttracks.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/eighttracks.py b/youtube_dl/extractor/eighttracks.py index cced06811..2cfbcd363 100644 --- a/youtube_dl/extractor/eighttracks.py +++ b/youtube_dl/extractor/eighttracks.py @@ -101,7 +101,7 @@ class EightTracksIE(InfoExtractor): first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id) next_url = first_url res = [] - for i in itertools.count(): + for i in range(track_count): api_json = self._download_webpage(next_url, playlist_id, note=u'Downloading song information %s/%s' % (str(i+1), track_count), errnote=u'Failed to download song information') @@ -116,7 +116,5 @@ class EightTracksIE(InfoExtractor): 'ext': 'm4a', } res.append(info) - if api_data['set']['at_last_track']: - break next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id']) return res From 5d0c97541af417064e5e3fb4eeb5416a436b0475 Mon Sep 17 00:00:00 2001 From: rzhxeo <rzhxeot7z81b4700@mailcatch.com> Date: Sat, 26 Oct 2013 20:38:54 +0200 Subject: [PATCH 186/264] [XHamsterIE] Extract SD and HD video --- youtube_dl/extractor/xhamster.py | 52 +++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 81c4be326..7444d3393 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -36,21 +36,25 @@ class XHamsterIE(InfoExtractor): }] def _real_extract(self,url): + def extract_video_url(webpage): + mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage) + if mobj is None: + raise ExtractorError(u'Unable to extract media URL') + if len(mobj.group('server')) == 0: + return compat_urllib_parse.unquote(mobj.group('file')) + else: + return mobj.group('server')+'/key='+mobj.group('file') + + def is_hd(webpage): + return webpage.find('<div class=\'icon iconHD\'>') != -1 + mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') seo = mobj.group('seo') - mrss_url = 'http://xhamster.com/movies/%s/%s.html?hd' % (video_id, seo) + mrss_url = 'http://xhamster.com/movies/%s/%s.html' % (video_id, seo) webpage = self._download_webpage(mrss_url, video_id) - mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage) - if mobj is None: - raise ExtractorError(u'Unable to extract media URL') - if len(mobj.group('server')) == 0: - video_url = compat_urllib_parse.unquote(mobj.group('file')) - else: - video_url = mobj.group('server')+'/key='+mobj.group('file') - video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com', webpage, u'title') @@ -76,14 +80,32 @@ class XHamsterIE(InfoExtractor): age_limit = self._rta_search(webpage) - return [{ - 'id': video_id, - 'url': video_url, - 'ext': determine_ext(video_url), - 'title': video_title, + video_url = extract_video_url(webpage) + hd = is_hd(webpage) + formats = [{ + 'url': video_url, + 'ext': determine_ext(video_url), + 'format': 'hd' if hd else 'sd', + 'format_id': 'hd' if hd else 'sd', + }] + if not hd: + webpage = self._download_webpage(mrss_url+'?hd', video_id) + if is_hd(webpage): + video_url = extract_video_url(webpage) + formats.append({ + 'url': video_url, + 'ext': determine_ext(video_url), + 'format': 'hd', + 'format_id': 'hd', + }) + + return { + 'id': video_id, + 'title': video_title, + 'formats': formats, 'description': video_description, 'upload_date': video_upload_date, 'uploader_id': video_uploader_id, 'thumbnail': video_thumbnail, 'age_limit': age_limit, - }] + } From 7df286540f893f7fbba07da8ba3b09dd7c9027c4 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Sat, 26 Oct 2013 21:57:10 +0200 Subject: [PATCH 187/264] [YouPornIE] Extract all encrypted links and remove doubles at the end --- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/extractor/youporn.py | 78 +++++++++++---------------------- 2 files changed, 27 insertions(+), 53 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e2332f9b8..d4654cc05 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -462,7 +462,7 @@ class YoutubeDL(object): info_dict['playlist_index'] = None # This extractors handle format selection themselves - if info_dict['extractor'] in [u'youtube', u'Youku', u'YouPorn', u'mixcloud']: + if info_dict['extractor'] in [u'youtube', u'Youku', u'mixcloud']: if download: self.process_info(info_dict) return info_dict diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index e3b56cece..704ee89dc 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -31,20 +31,6 @@ class YouPornIE(InfoExtractor): } } - def _print_formats(self, formats): - """Print all available formats""" - print(u'Available formats:') - print(u'ext\t\tformat') - print(u'---------------------------------') - for format in formats: - print(u'%s\t\t%s' % (format['ext'], format['format'])) - - def _specific(self, req_format, formats): - for x in formats: - if x["format"] == req_format: - return x - return None - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('videoid') @@ -71,27 +57,22 @@ class YouPornIE(InfoExtractor): except KeyError: raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1]) - # Get all of the formats available + # Get all of the links from the page DOWNLOAD_LIST_RE = r'(?s)
    (?P.*?)
' download_list_html = self._search_regex(DOWNLOAD_LIST_RE, webpage, u'download list').strip() - - # Get all of the links from the page - LINK_RE = r'(?s)' + LINK_RE = r'' links = re.findall(LINK_RE, download_list_html) - - # Get link of hd video if available - mobj = re.search(r'var encryptedQuality720URL = \'(?P[a-zA-Z0-9+/]+={0,2})\';', webpage) - if mobj != None: - encrypted_video_url = mobj.group(u'encrypted_video_url') - video_url = aes_decrypt_text(encrypted_video_url, video_title, 32).decode('utf-8') - links = [video_url] + links + + # Get all encrypted links + encrypted_links = re.findall(r'var encryptedQuality[0-9]{3}URL = \'([a-zA-Z0-9+/]+={0,2})\';', webpage) + for encrypted_link in encrypted_links: + link = aes_decrypt_text(encrypted_link, video_title, 32).decode('utf-8') + links.append(link) if not links: raise ExtractorError(u'ERROR: no known formats available for video') - self.to_screen(u'Links found: %d' % len(links)) - formats = [] for link in links: @@ -103,39 +84,32 @@ class YouPornIE(InfoExtractor): path = compat_urllib_parse_urlparse( video_url ).path extension = os.path.splitext( path )[1][1:] format = path.split('/')[4].split('_')[:2] + # size = format[0] # bitrate = format[1] format = "-".join( format ) # title = u'%s-%s-%s' % (video_title, size, bitrate) formats.append({ - 'id': video_id, 'url': video_url, - 'uploader': video_uploader, - 'upload_date': upload_date, - 'title': video_title, 'ext': extension, 'format': format, - 'thumbnail': thumbnail, - 'description': video_description, - 'age_limit': age_limit, + 'format_id': format, }) - if self._downloader.params.get('listformats', None): - self._print_formats(formats) - return - - req_format = self._downloader.params.get('format', 'best') - self.to_screen(u'Format: %s' % req_format) - - if req_format is None or req_format == 'best': - return [formats[0]] - elif req_format == 'worst': - return [formats[-1]] - elif req_format in ('-1', 'all'): - return formats - else: - format = self._specific( req_format, formats ) - if format is None: - raise ExtractorError(u'Requested format not available') - return [format] + # Sort and remove doubles + formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-')))) + for i in range(len(formats)-1,0,-1): + if formats[i]['format_id'] == formats[i-1]['format_id']: + del formats[i] + + return { + 'id': video_id, + 'uploader': video_uploader, + 'upload_date': upload_date, + 'title': video_title, + 'thumbnail': thumbnail, + 'description': video_description, + 'age_limit': age_limit, + 'formats': formats, + } From 1d45a23b745cdbb361dd5cef8f848f7ebcfa8f5a Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Sat, 26 Oct 2013 23:27:30 +0200 Subject: [PATCH 188/264] Add support for http://www.tube8.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/tube8.py | 63 ++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 youtube_dl/extractor/tube8.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index db69af361..84fc2e4fa 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -121,6 +121,7 @@ from .tf1 import TF1IE from .thisav import ThisAVIE from .traileraddict import TrailerAddictIE from .trilulilu import TriluliluIE +from .tube8 import Tube8IE from .tudou import TudouIE from .tumblr import TumblrIE from .tutv import TutvIE diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py new file mode 100644 index 000000000..b7e7d984d --- /dev/null +++ b/youtube_dl/extractor/tube8.py @@ -0,0 +1,63 @@ +import os +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse_urlparse, + compat_urllib_request, + compat_urllib_parse, + unescapeHTML, +) +from ..aes import ( + aes_decrypt_text +) + +class Tube8IE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Ptube8.com/[^/]+/[^/]+/(?P[0-9]+)/?)' + _TEST = { + u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/', + u'file': u'229795.mp4', + u'md5': u'e9e0b0c86734e5e3766e653509475db0', + u'info_dict': { + u"description": u"hot teen Kasia grinding", + u"uploader": u"unknown", + u"title": u"Kasia music video", + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('videoid') + url = 'http://www.' + mobj.group('url') + + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'age_verified=1') + webpage = self._download_webpage(req, video_id) + + video_title = self._html_search_regex(r'videotitle ="([^"]+)', webpage, u'title') + video_description = self._html_search_regex(r'>Description:(.+?)<', webpage, u'description', fatal=False) + video_uploader = self._html_search_regex(r'>Submitted by:(?:\w|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False) + thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False) + if thumbnail: + thumbnail = thumbnail.replace('\\/', '/') + + video_url = self._html_search_regex(r'"video_url":"([^"]+)', webpage, u'video_url') + if webpage.find('"encrypted":true')!=-1: + password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password') + video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8') + path = compat_urllib_parse_urlparse( video_url ).path + extension = os.path.splitext( path )[1][1:] + format = path.split('/')[4].split('_')[:2] + format = "-".join( format ) + + return { + 'id': video_id, + 'uploader': video_uploader, + 'title': video_title, + 'thumbnail': thumbnail, + 'description': video_description, + 'url': video_url, + 'ext': extension, + 'format': format, + 'format_id': format, + } From 6e76104d66624a8f742d1e0d210a35452a79aec8 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Sat, 26 Oct 2013 23:33:32 +0200 Subject: [PATCH 189/264] [YouPornIE] Make webpage download more robust --- youtube_dl/extractor/youporn.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index 704ee89dc..e46a9b4d6 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -17,7 +17,7 @@ from ..aes import ( ) class YouPornIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P[0-9]+)/(?P[^/]+)' + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))' _TEST = { u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', u'file': u'505835.mp4', @@ -34,6 +34,7 @@ class YouPornIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('videoid') + url = 'http://www.' + mobj.group('url') req = compat_urllib_request.Request(url) req.add_header('Cookie', 'age_verified=1') From 14e10b2b6ec0d1ac3af36cc0458673ec89a88f03 Mon Sep 17 00:00:00 2001 From: pyed <iAbdulElah@Gmail.com> Date: Sun, 27 Oct 2013 01:19:38 +0300 Subject: [PATCH 190/264] [addanime] try to download HQ before normal --- youtube_dl/extractor/addanime.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py index 82a785a19..adbda194a 100644 --- a/youtube_dl/extractor/addanime.py +++ b/youtube_dl/extractor/addanime.py @@ -17,8 +17,8 @@ class AddAnimeIE(InfoExtractor): IE_NAME = u'AddAnime' _TEST = { u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', - u'file': u'24MR3YO5SAS9.flv', - u'md5': u'1036a0e0cd307b95bd8a8c3a5c8cfaf1', + u'file': u'24MR3YO5SAS9.mp4', + u'md5': u'3f8e232ad52163c87fa23897e736cb2c', u'info_dict': { u"description": u"One Piece 606", u"title": u"One Piece 606" @@ -60,8 +60,12 @@ class AddAnimeIE(InfoExtractor): note=u'Confirming after redirect') webpage = self._download_webpage(url, video_id) - video_url = self._search_regex(r"var normal_video_file = '(.*?)';", + video_url = self._search_regex(r"var hq_video_file = '(.*?)';", webpage, u'video file URL') + if not video_url: # if there's no hq_video_file, get normal_video_file + video_url = self._search_regex(r"var normal_video_file = '(.*?)';", + webpage, u'video file URL') + video_extension = video_url[-3:] # mp4 or flv ? video_title = self._og_search_title(webpage) video_description = self._og_search_description(webpage) @@ -69,7 +73,7 @@ class AddAnimeIE(InfoExtractor): '_type': 'video', 'id': video_id, 'url': video_url, - 'ext': 'flv', + 'ext': video_extension, 'title': video_title, 'description': video_description } From 8cb57d9b91cce72b522d89b5e3e469c433956a07 Mon Sep 17 00:00:00 2001 From: rzhxeo <rzhxeo@users.noreply.github.com> Date: Sun, 27 Oct 2013 00:21:27 +0200 Subject: [PATCH 191/264] [Tube8IE] Escape dot in regex --- youtube_dl/extractor/tube8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index b7e7d984d..ef8d21642 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -13,7 +13,7 @@ from ..aes import ( ) class Tube8IE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8.com/[^/]+/[^/]+/(?P<videoid>[0-9]+)/?)' + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/[^/]+/[^/]+/(?P<videoid>[0-9]+)/?)' _TEST = { u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/', u'file': u'229795.mp4', From 125cfd78e8579b1c6104d3ec2359417677863a8a Mon Sep 17 00:00:00 2001 From: rzhxeo <rzhxeot7z81b4700@mailcatch.com> Date: Sun, 27 Oct 2013 01:04:22 +0200 Subject: [PATCH 192/264] Add support for http://www.pornhub.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/pornhub.py | 67 ++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 youtube_dl/extractor/pornhub.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index db69af361..2a5518665 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -94,6 +94,7 @@ from .ooyala import OoyalaIE from .orf import ORFIE from .pbs import PBSIE from .photobucket import PhotobucketIE +from .pornhub import PornHubIE from .pornotube import PornotubeIE from .rbmaradio import RBMARadioIE from .redtube import RedTubeIE diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py new file mode 100644 index 000000000..3dbd2ab69 --- /dev/null +++ b/youtube_dl/extractor/pornhub.py @@ -0,0 +1,67 @@ +import os +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse_urlparse, + compat_urllib_request, + compat_urllib_parse, + unescapeHTML, +) +from ..aes import ( + aes_decrypt_text +) + +class PornHubIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9]+))' + _TEST = { + u'url': u'http://www.pornhub.com/view_video.php?viewkey=648719015', + u'file': u'648719015.mp4', + u'md5': u'882f488fa1f0026f023f33576004a2ed', + u'info_dict': { + u"uploader": u"BABES-COM", + u"title": u"Seductive Indian beauty strips down and fingers her pink pussy", + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('videoid') + url = 'http://www.' + mobj.group('url') + + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'age_verified=1') + webpage = self._download_webpage(req, video_id) + + video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, u'title') + video_uploader = self._html_search_regex(r'<b>From: </b>(?:\s|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False) + thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False) + if thumbnail: + thumbnail = compat_urllib_parse.unquote(thumbnail) + + video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage))) + if webpage.find('"encrypted":true') != -1: + password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password').replace('+', ' ') + video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls)) + + formats = [] + for video_url in video_urls: + path = compat_urllib_parse_urlparse( video_url ).path + extension = os.path.splitext( path )[1][1:] + format = path.split('/')[5].split('_')[:2] + format = "-".join( format ) + formats.append({ + 'url': video_url, + 'ext': extension, + 'format': format, + 'format_id': format, + }) + formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-')))) + + return { + 'id': video_id, + 'uploader': video_uploader, + 'title': video_title, + 'thumbnail': thumbnail, + 'formats': formats, + } From 71865091abbb0166edeffff14da019542260557f Mon Sep 17 00:00:00 2001 From: rzhxeo <rzhxeo@users.noreply.github.com> Date: Sun, 27 Oct 2013 01:08:03 +0200 Subject: [PATCH 193/264] [Tube8IE] Fix regex for uploader extraction --- youtube_dl/extractor/tube8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index ef8d21642..ebc8c1f4f 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -36,7 +36,7 @@ class Tube8IE(InfoExtractor): video_title = self._html_search_regex(r'videotitle ="([^"]+)', webpage, u'title') video_description = self._html_search_regex(r'>Description:</strong>(.+?)<', webpage, u'description', fatal=False) - video_uploader = self._html_search_regex(r'>Submitted by:</strong>(?:\w|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False) + video_uploader = self._html_search_regex(r'>Submitted by:</strong>(?:\s|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False) thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False) if thumbnail: thumbnail = thumbnail.replace('\\/', '/') From 7b2212e954a3f2ecf1c0936d7c5b90a43fa380cd Mon Sep 17 00:00:00 2001 From: rzhxeo <rzhxeot7z81b4700@mailcatch.com> Date: Sun, 27 Oct 2013 01:59:26 +0200 Subject: [PATCH 194/264] Add support for http://www.spankwire.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/spankwire.py | 70 +++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 youtube_dl/extractor/spankwire.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index db69af361..7a60e0937 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -109,6 +109,7 @@ from .slideshare import SlideshareIE from .sohu import SohuIE from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE from .southparkstudios import SouthParkStudiosIE +from .spankwire import SpankwireIE from .spiegel import SpiegelIE from .stanfordoc import StanfordOpenClassroomIE from .statigram import StatigramIE diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py new file mode 100644 index 000000000..f0d5009c7 --- /dev/null +++ b/youtube_dl/extractor/spankwire.py @@ -0,0 +1,70 @@ +import os +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse_urlparse, + compat_urllib_request, + compat_urllib_parse, + unescapeHTML, +) +from ..aes import ( + aes_decrypt_text +) + +class SpankwireIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)' + _TEST = { + u'url': u'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/', + u'file': u'103545.mp4', + u'md5': u'1b3f55e345500552dbc252a3e9c1af43', + u'info_dict': { + u"uploader": u"oreusz", + u"title": u"Buckcherry`s X Rated Music Video Crazy Bitch", + u"description": u"Crazy Bitch X rated music video.", + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('videoid') + url = 'http://www.' + mobj.group('url') + + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'age_verified=1') + webpage = self._download_webpage(req, video_id) + + video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, u'title') + video_uploader = self._html_search_regex(r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False) + thumbnail = self._html_search_regex(r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False) + description = self._html_search_regex(r'>\s*Description:</div>\s*<[^>]*>([^<]+)', webpage, u'description', fatal=False) + if len(description) == 0: + description = None + + video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage))) + if webpage.find('flashvars\.encrypted = "true"') != -1: + password = self._html_search_regex(r'flashvars\.video_title = "([^"]+)', webpage, u'password').replace('+', ' ') + video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls)) + + formats = [] + for video_url in video_urls: + path = compat_urllib_parse_urlparse( video_url ).path + extension = os.path.splitext( path )[1][1:] + format = path.split('/')[4].split('_')[:2] + format = "-".join( format ) + formats.append({ + 'url': video_url, + 'ext': extension, + 'format': format, + 'format_id': format, + }) + formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-')))) + + return { + 'id': video_id, + 'uploader': video_uploader, + 'title': video_title, + 'thumbnail': thumbnail, + 'description': description, + 'formats': formats, + } From 5b11143d05c6d38cf1df94561c2a515c9150b2e1 Mon Sep 17 00:00:00 2001 From: rzhxeo <rzhxeot7z81b4700@mailcatch.com> Date: Sun, 27 Oct 2013 10:10:28 +0100 Subject: [PATCH 195/264] Add support for http://www.keezmovies.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/keezmovies.py | 58 ++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 youtube_dl/extractor/keezmovies.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index db69af361..d4ad4e37c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -72,6 +72,7 @@ from .jeuxvideo import JeuxVideoIE from .jukebox import JukeboxIE from .justintv import JustinTVIE from .kankan import KankanIE +from .keezmovies import KeezMoviesIE from .kickstarter import KickStarterIE from .keek import KeekIE from .liveleak import LiveLeakIE diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py new file mode 100644 index 000000000..937caf664 --- /dev/null +++ b/youtube_dl/extractor/keezmovies.py @@ -0,0 +1,58 @@ +import os +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse_urlparse, + compat_urllib_request, + compat_urllib_parse, + unescapeHTML, +) +from ..aes import ( + aes_decrypt_text +) + +class KeezMoviesIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>keezmovies\.com/video/.+?(?P<videoid>[0-9]+))' + _TEST = { + u'url': u'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711', + u'file': u'1214711.mp4', + u'md5': u'6e297b7e789329923fcf83abb67c9289', + u'info_dict': { + u"title": u"Petite Asian Lady Mai Playing In Bathtub", + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('videoid') + url = 'http://www.' + mobj.group('url') + + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'age_verified=1') + webpage = self._download_webpage(req, video_id) + + # embedded video + mobj = re.search(r'href="([^"]+)"></iframe>', webpage) + if mobj: + embedded_url = mobj.group(1) + return self.playlist_result([self.url_result(embedded_url)], playlist_id=video_id) + + video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, u'title') + video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, u'video_url')) + if webpage.find('encrypted=true')!=-1: + password = self._html_search_regex(r'video_title=(.+?)&', webpage, u'password') + video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8') + path = compat_urllib_parse_urlparse( video_url ).path + extension = os.path.splitext( path )[1][1:] + format = path.split('/')[4].split('_')[:2] + format = "-".join( format ) + + return { + 'id': video_id, + 'title': video_title, + 'url': video_url, + 'ext': extension, + 'format': format, + 'format_id': format, + } From aee5e18c8f4c4360216ab27a2b1362a2ce24881e Mon Sep 17 00:00:00 2001 From: Abdulelah Alfntokh <iAbdulelah@Gmail.com> Date: Sun, 27 Oct 2013 13:36:43 +0300 Subject: [PATCH 196/264] [addanime] catch 'RegexNotFoundError' --- youtube_dl/extractor/addanime.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py index adbda194a..45aac15c3 100644 --- a/youtube_dl/extractor/addanime.py +++ b/youtube_dl/extractor/addanime.py @@ -8,6 +8,7 @@ from ..utils import ( compat_urllib_parse_urlparse, ExtractorError, + RegexNotFoundError, ) @@ -60,11 +61,13 @@ class AddAnimeIE(InfoExtractor): note=u'Confirming after redirect') webpage = self._download_webpage(url, video_id) - video_url = self._search_regex(r"var hq_video_file = '(.*?)';", - webpage, u'video file URL') - if not video_url: # if there's no hq_video_file, get normal_video_file + try: + video_url = self._search_regex(r"var hq_video_file = '(.*?)';", + webpage, u'video file URL') + except RegexNotFoundError: video_url = self._search_regex(r"var normal_video_file = '(.*?)';", webpage, u'video file URL') + video_extension = video_url[-3:] # mp4 or flv ? video_title = self._og_search_title(webpage) video_description = self._og_search_description(webpage) From 3e6a330d38e2bfce12a789c0f51c7d9754f4316e Mon Sep 17 00:00:00 2001 From: Abdulelah Alfntokh <iAbdulelah@Gmail.com> Date: Sun, 27 Oct 2013 13:51:26 +0300 Subject: [PATCH 197/264] [addanime] fix md5sum --- youtube_dl/extractor/addanime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py index 45aac15c3..490b5af62 100644 --- a/youtube_dl/extractor/addanime.py +++ b/youtube_dl/extractor/addanime.py @@ -19,7 +19,7 @@ class AddAnimeIE(InfoExtractor): _TEST = { u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', u'file': u'24MR3YO5SAS9.mp4', - u'md5': u'3f8e232ad52163c87fa23897e736cb2c', + u'md5': u'72954ea10bc979ab5e2eb288b21425a0', u'info_dict': { u"description": u"One Piece 606", u"title": u"One Piece 606" From 67874aeffa37a114b01fe6be11d156b7ece584b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 27 Oct 2013 12:07:58 +0100 Subject: [PATCH 198/264] [facebook] Fix the login process (fixes #1244) --- youtube_dl/extractor/facebook.py | 63 ++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 9d1bc0751..62881da31 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -19,7 +19,8 @@ class FacebookIE(InfoExtractor): """Information Extractor for Facebook""" _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)' - _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&' + _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1' + _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1' _NETRC_MACHINE = 'facebook' IE_NAME = u'facebook' _TEST = { @@ -36,50 +37,56 @@ class FacebookIE(InfoExtractor): """Report attempt to log in.""" self.to_screen(u'Logging in') - def _real_initialize(self): - if self._downloader is None: - return - - useremail = None - password = None - downloader_params = self._downloader.params - - # Attempt to use provided username and password or .netrc data - if downloader_params.get('username', None) is not None: - useremail = downloader_params['username'] - password = downloader_params['password'] - elif downloader_params.get('usenetrc', False): - try: - info = netrc.netrc().authenticators(self._NETRC_MACHINE) - if info is not None: - useremail = info[0] - password = info[2] - else: - raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) - except (IOError, netrc.NetrcParseError) as err: - self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err)) - return - + def _login(self): + (useremail, password) = self._get_login_info() if useremail is None: return - # Log in + login_page_req = compat_urllib_request.Request(self._LOGIN_URL) + login_page_req.add_header('Cookie', 'locale=en_US') + self.report_login() + login_page = self._download_webpage(login_page_req, None, note=False, + errnote=u'Unable to download login page') + lsd = self._search_regex(r'"lsd":"(\w*?)"', login_page, u'lsd') + lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, u'lgnrnd') + login_form = { 'email': useremail, 'pass': password, - 'login': 'Log+In' + 'lsd': lsd, + 'lgnrnd': lgnrnd, + 'next': 'http://facebook.com/home.php', + 'default_persistent': '0', + 'legacy_return': '1', + 'timezone': '-60', + 'trynum': '1', } request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) + request.add_header('Content-Type', 'application/x-www-form-urlencoded') try: - self.report_login() login_results = compat_urllib_request.urlopen(request).read() if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None: self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') return + + check_form = { + 'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, u'fb_dtsg'), + 'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, u'nh'), + 'name_action_selected': 'dont_save', + 'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, u'continue'), + } + check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form)) + check_req.add_header('Content-Type', 'application/x-www-form-urlencoded') + check_response = compat_urllib_request.urlopen(check_req).read() + if re.search(r'id="checkpointSubmitButton"', check_response) is not None: + self._downloader.report_warning(u'Unable to confirm login, you have to login in your brower and authorize the login.') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.report_warning(u'unable to log in: %s' % compat_str(err)) return + def _real_initialize(self): + self._login() + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: From 6f71ef580c0d93947817c81a09f6a188631585a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 27 Oct 2013 12:09:46 +0100 Subject: [PATCH 199/264] [facebook] Report a more meaningful message if the video cannot be accessed (closes #1658) --- youtube_dl/extractor/facebook.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 62881da31..aa2525f17 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -100,7 +100,12 @@ class FacebookIE(InfoExtractor): AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});' m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage) if not m: - raise ExtractorError(u'Cannot parse data') + m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage) + if m_msg is not None: + err_msg = u'The video is not available, Facebook said: "%s"' % m_msg.group(1) + else: + err_msg = u'Cannot parse data' + raise ExtractorError(err_msg) data = dict(json.loads(m.group(1))) params_raw = compat_urllib_parse.unquote(data['params']) params = json.loads(params_raw) From 749a4fd2fd88017bafca5c298f16123fd0146b40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 27 Oct 2013 12:13:55 +0100 Subject: [PATCH 200/264] [facebook] Don't recommend to report the issue if the video is private. --- youtube_dl/extractor/facebook.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index aa2525f17..f8bdfc2d3 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -102,10 +102,11 @@ class FacebookIE(InfoExtractor): if not m: m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage) if m_msg is not None: - err_msg = u'The video is not available, Facebook said: "%s"' % m_msg.group(1) + raise ExtractorError( + u'The video is not available, Facebook said: "%s"' % m_msg.group(1), + expected=True) else: - err_msg = u'Cannot parse data' - raise ExtractorError(err_msg) + raise ExtractorError(u'Cannot parse data') data = dict(json.loads(m.group(1))) params_raw = compat_urllib_parse.unquote(data['params']) params = json.loads(params_raw) From 5da054958151263040f2a53cf554b0084e79f6fa Mon Sep 17 00:00:00 2001 From: rzhxeo <rzhxeot7z81b4700@mailcatch.com> Date: Sun, 27 Oct 2013 12:48:09 +0100 Subject: [PATCH 201/264] [KeezMoviesIE] Correct return value for embedded videos --- youtube_dl/extractor/keezmovies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index 937caf664..23d5209d9 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -36,7 +36,7 @@ class KeezMoviesIE(InfoExtractor): mobj = re.search(r'href="([^"]+)"></iframe>', webpage) if mobj: embedded_url = mobj.group(1) - return self.playlist_result([self.url_result(embedded_url)], playlist_id=video_id) + return self.url_result(embedded_url) video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, u'title') video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, u'video_url')) From af4d506eb35a257e91098fa92498b24ef5de14c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 27 Oct 2013 14:18:55 +0100 Subject: [PATCH 202/264] [faz] Use a regex for getting the description The page cannot be parsed in python2.6 with the html parser. --- youtube_dl/extractor/faz.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py index deaa4ed2d..89ed08db4 100644 --- a/youtube_dl/extractor/faz.py +++ b/youtube_dl/extractor/faz.py @@ -5,8 +5,6 @@ import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( determine_ext, - clean_html, - get_element_by_attribute, ) @@ -47,12 +45,12 @@ class FazIE(InfoExtractor): 'format_id': code.lower(), }) - descr_html = get_element_by_attribute('class', 'Content Copy', webpage) + descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description') info = { 'id': video_id, 'title': self._og_search_title(webpage), 'formats': formats, - 'description': clean_html(descr_html), + 'description': descr, 'thumbnail': config.find('STILL/STILL_BIG').text, } # TODO: Remove when #980 has been merged From aa929c37d58163cc13184b6922ebc9ceb4625239 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 27 Oct 2013 14:21:37 +0100 Subject: [PATCH 203/264] [generic] Fix test video's checksum --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 69e0a7bd2..ab4a5b7de 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -25,7 +25,7 @@ class GenericIE(InfoExtractor): { u'url': u'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html', u'file': u'13601338388002.mp4', - u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89', + u'md5': u'6e15c93721d7ec9e9ca3fdbf07982cfd', u'info_dict': { u"uploader": u"www.hodiho.fr", u"title": u"R\u00e9gis plante sa Jeep" From bc63d9d3294072e2b355c3363c0fb5c33756d3af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 27 Oct 2013 14:26:19 +0100 Subject: [PATCH 204/264] [rtlnow] Change the test for rtlnitronow --- youtube_dl/extractor/rtlnow.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py index d1b08c9bc..9ac7c3be8 100644 --- a/youtube_dl/extractor/rtlnow.py +++ b/youtube_dl/extractor/rtlnow.py @@ -63,13 +63,12 @@ class RTLnowIE(InfoExtractor): }, }, { - u'url': u'http://www.rtlnitronow.de/recht-ordnung/lebensmittelkontrolle-erlangenordnungsamt-berlin.php?film_id=127367&player=1&season=1', - u'file': u'127367.flv', + u'url': u'http://www.rtlnitronow.de/recht-ordnung/stadtpolizei-frankfurt-gerichtsvollzieher-leipzig.php?film_id=129679&player=1&season=1', + u'file': u'129679.flv', u'info_dict': { - u'upload_date': u'20130926', - u'title': u'Recht & Ordnung - Lebensmittelkontrolle Erlangen/Ordnungsamt...', - u'description': u'Lebensmittelkontrolle Erlangen/Ordnungsamt Berlin', - u'thumbnail': u'http://autoimg.static-fra.de/nitronow/344787/1500x1500/image2.jpg', + u'upload_date': u'20131016', + u'title': u'Recht & Ordnung - Stadtpolizei Frankfurt/ Gerichtsvollzieher...', + u'description': u'Stadtpolizei Frankfurt/ Gerichtsvollzieher Leipzig', }, u'params': { u'skip_download': True, From c19f7764a5499b0f1e1914dd5101619b8d57d7cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 27 Oct 2013 14:40:25 +0100 Subject: [PATCH 205/264] [generic] Detect bandcamp pages that use custom domains (closes #1662) They embed the original url in the 'og:url' property. --- youtube_dl/extractor/generic.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ab4a5b7de..2c8fcf5ae 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -41,7 +41,17 @@ class GenericIE(InfoExtractor): u"uploader_id": u"skillsmatter", u"uploader": u"Skills Matter", } - } + }, + # bandcamp page with custom domain + { + u'url': u'http://bronyrock.com/track/the-pony-mash', + u'file': u'3235767654.mp3', + u'info_dict': { + u'title': u'The Pony Mash', + u'uploader': u'M_Pallante', + }, + u'skip': u'There is a limit of 200 free downloads / month for the test song', + }, ] def report_download_webpage(self, video_id): @@ -155,6 +165,12 @@ class GenericIE(InfoExtractor): surl = unescapeHTML(mobj.group(1)) return self.url_result(surl, 'Youtube') + # Look for Bandcamp pages with custom domain + mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage) + if mobj is not None: + burl = unescapeHTML(mobj.group(1)) + return self.url_result(burl, 'Bandcamp') + # Start with something easy: JW Player in SWFObject mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if mobj is None: From 198e370f23d9e97b335d1c2603b9fc624817b701 Mon Sep 17 00:00:00 2001 From: Abdulelah Alfntokh <iAbdulelah@Gmail.com> Date: Sun, 27 Oct 2013 19:48:02 +0300 Subject: [PATCH 206/264] [addanime] better regex. --- youtube_dl/extractor/addanime.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py index 490b5af62..465df8cf0 100644 --- a/youtube_dl/extractor/addanime.py +++ b/youtube_dl/extractor/addanime.py @@ -8,7 +8,6 @@ from ..utils import ( compat_urllib_parse_urlparse, ExtractorError, - RegexNotFoundError, ) @@ -61,12 +60,8 @@ class AddAnimeIE(InfoExtractor): note=u'Confirming after redirect') webpage = self._download_webpage(url, video_id) - try: - video_url = self._search_regex(r"var hq_video_file = '(.*?)';", - webpage, u'video file URL') - except RegexNotFoundError: - video_url = self._search_regex(r"var normal_video_file = '(.*?)';", - webpage, u'video file URL') + video_url = self._search_regex(r"var (?:hq|normal)_video_file = '(.*?)';", + webpage, u'video file URL') video_extension = video_url[-3:] # mp4 or flv ? video_title = self._og_search_title(webpage) From 7d8c2e07f218dc33aefb77db78fa420becb53732 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda <filippo.valsorda@gmail.com> Date: Mon, 28 Oct 2013 00:33:43 -0400 Subject: [PATCH 207/264] [Exfm] replace the failing Soundcloud test vector (broken also in browser) --- youtube_dl/extractor/exfm.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/exfm.py b/youtube_dl/extractor/exfm.py index 3443f19c5..c74556579 100644 --- a/youtube_dl/extractor/exfm.py +++ b/youtube_dl/extractor/exfm.py @@ -11,14 +11,14 @@ class ExfmIE(InfoExtractor): _SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream' _TESTS = [ { - u'url': u'http://ex.fm/song/1bgtzg', - u'file': u'95223130.mp3', - u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf', + u'url': u'http://ex.fm/song/eh359', + u'file': u'44216187.mp3', + u'md5': u'e45513df5631e6d760970b14cc0c11e7', u'info_dict': { - u"title": u"We Can't Stop - Miley Cyrus", - u"uploader": u"Miley Cyrus", - u'upload_date': u'20130603', - u'description': u'Download "We Can\'t Stop" \r\niTunes: http://smarturl.it/WeCantStop?IQid=SC\r\nAmazon: http://smarturl.it/WeCantStopAMZ?IQid=SC', + u"title": u"Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive", + u"uploader": u"deadjournalist", + u'upload_date': u'20120424', + u'description': u'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive', }, u'note': u'Soundcloud song', }, From 750e9833b83c6e17a4efa8d5dac5b3cd848f4603 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda <filippo.valsorda@gmail.com> Date: Mon, 28 Oct 2013 01:50:17 -0400 Subject: [PATCH 208/264] Add the missing age_limit tags; added a devscript to do a superficial check for porn sites without the age_limit tag in the test --- devscripts/check-porn.py | 39 ++++++++++++++++++++++++++++++ youtube_dl/extractor/keezmovies.py | 5 +++- youtube_dl/extractor/pornhub.py | 2 ++ youtube_dl/extractor/pornotube.py | 3 ++- youtube_dl/extractor/spankwire.py | 4 +++ youtube_dl/extractor/tube8.py | 2 ++ youtube_dl/extractor/youjizz.py | 8 ++++-- 7 files changed, 59 insertions(+), 4 deletions(-) create mode 100644 devscripts/check-porn.py diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py new file mode 100644 index 000000000..63401fe18 --- /dev/null +++ b/devscripts/check-porn.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python + +""" +This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check +if we are not 'age_limit' tagging some porn site +""" + +# Allow direct execution +import os +import sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import get_testcases +from youtube_dl.utils import compat_urllib_request + +for test in get_testcases(): + try: + webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read() + except: + print('\nFail: {0}'.format(test['name'])) + continue + + webpage = webpage.decode('utf8', 'replace') + + if 'porn' in webpage.lower() and ('info_dict' not in test + or 'age_limit' not in test['info_dict'] + or test['info_dict']['age_limit'] != 18): + print('\nPotential missing age_limit check: {0}'.format(test['name'])) + + elif 'porn' not in webpage.lower() and ('info_dict' in test and + 'age_limit' in test['info_dict'] and + test['info_dict']['age_limit'] == 18): + print('\nPotential false negative: {0}'.format(test['name'])) + + else: + sys.stdout.write('.') + sys.stdout.flush() + +print() diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index 23d5209d9..5e05900da 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -6,7 +6,6 @@ from ..utils import ( compat_urllib_parse_urlparse, compat_urllib_request, compat_urllib_parse, - unescapeHTML, ) from ..aes import ( aes_decrypt_text @@ -20,6 +19,7 @@ class KeezMoviesIE(InfoExtractor): u'md5': u'6e297b7e789329923fcf83abb67c9289', u'info_dict': { u"title": u"Petite Asian Lady Mai Playing In Bathtub", + u"age_limit": 18, } } @@ -48,6 +48,8 @@ class KeezMoviesIE(InfoExtractor): format = path.split('/')[4].split('_')[:2] format = "-".join( format ) + age_limit = self._rta_search(webpage) + return { 'id': video_id, 'title': video_title, @@ -55,4 +57,5 @@ class KeezMoviesIE(InfoExtractor): 'ext': extension, 'format': format, 'format_id': format, + 'age_limit': age_limit, } diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 3dbd2ab69..5e2454f1b 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -21,6 +21,7 @@ class PornHubIE(InfoExtractor): u'info_dict': { u"uploader": u"BABES-COM", u"title": u"Seductive Indian beauty strips down and fingers her pink pussy", + u"age_limit": 18 } } @@ -64,4 +65,5 @@ class PornHubIE(InfoExtractor): 'title': video_title, 'thumbnail': thumbnail, 'formats': formats, + 'age_limit': 18, } diff --git a/youtube_dl/extractor/pornotube.py b/youtube_dl/extractor/pornotube.py index 5d770ec28..35dc5a9ff 100644 --- a/youtube_dl/extractor/pornotube.py +++ b/youtube_dl/extractor/pornotube.py @@ -16,7 +16,8 @@ class PornotubeIE(InfoExtractor): u'md5': u'374dd6dcedd24234453b295209aa69b6', u'info_dict': { u"upload_date": u"20090708", - u"title": u"Marilyn-Monroe-Bathing" + u"title": u"Marilyn-Monroe-Bathing", + u"age_limit": 18 } } diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index f0d5009c7..32df0a7fb 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -22,6 +22,7 @@ class SpankwireIE(InfoExtractor): u"uploader": u"oreusz", u"title": u"Buckcherry`s X Rated Music Video Crazy Bitch", u"description": u"Crazy Bitch X rated music video.", + u"age_limit": 18, } } @@ -60,6 +61,8 @@ class SpankwireIE(InfoExtractor): }) formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-')))) + age_limit = self._rta_search(webpage) + return { 'id': video_id, 'uploader': video_uploader, @@ -67,4 +70,5 @@ class SpankwireIE(InfoExtractor): 'thumbnail': thumbnail, 'description': description, 'formats': formats, + 'age_limit': age_limit, } diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index ebc8c1f4f..aea9d9a24 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -22,6 +22,7 @@ class Tube8IE(InfoExtractor): u"description": u"hot teen Kasia grinding", u"uploader": u"unknown", u"title": u"Kasia music video", + u"age_limit": 18, } } @@ -60,4 +61,5 @@ class Tube8IE(InfoExtractor): 'ext': extension, 'format': format, 'format_id': format, + 'age_limit': 18, } diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py index 1265639e8..1fcc518ac 100644 --- a/youtube_dl/extractor/youjizz.py +++ b/youtube_dl/extractor/youjizz.py @@ -13,7 +13,8 @@ class YouJizzIE(InfoExtractor): u'file': u'2189178.flv', u'md5': u'07e15fa469ba384c7693fd246905547c', u'info_dict': { - u"title": u"Zeichentrick 1" + u"title": u"Zeichentrick 1", + u"age_limit": 18, } } @@ -25,6 +26,8 @@ class YouJizzIE(InfoExtractor): # Get webpage content webpage = self._download_webpage(url, video_id) + age_limit = self._rta_search(webpage) + # Get the video title video_title = self._html_search_regex(r'<title>(?P<title>.*)', webpage, u'title').strip() @@ -60,6 +63,7 @@ class YouJizzIE(InfoExtractor): 'title': video_title, 'ext': 'flv', 'format': 'flv', - 'player_url': embed_page_url} + 'player_url': embed_page_url, + 'age_limit': age_limit} return [info] From 8ffa13e03e995f2009d8240cbdc6ba7aba9d3759 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Mon, 28 Oct 2013 02:34:29 -0400 Subject: [PATCH 209/264] [Instagram] get the non-https link, as they are serving Akamai cert from a instagram.com domain --- youtube_dl/extractor/common.py | 8 ++++---- youtube_dl/extractor/instagram.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index aaa5c24c8..8b067b48d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -318,10 +318,10 @@ class InfoExtractor(object): def _og_search_title(self, html, **kargs): return self._og_search_property('title', html, **kargs) - def _og_search_video_url(self, html, name='video url', **kargs): - return self._html_search_regex([self._og_regex('video:secure_url'), - self._og_regex('video')], - html, name, **kargs) + def _og_search_video_url(self, html, name='video url', secure=True, **kargs): + regexes = [self._og_regex('video')] + if secure: regexes.insert(0, self._og_regex('video:secure_url')) + return self._html_search_regex(regexes, html, name, **kargs) def _rta_search(self, html): # See http://www.rtalabel.org/index.php?content=howtofaq#single diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index ddc42882a..213aac428 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -26,7 +26,7 @@ class InstagramIE(InfoExtractor): return [{ 'id': video_id, - 'url': self._og_search_video_url(webpage), + 'url': self._og_search_video_url(webpage, secure=False), 'ext': 'mp4', 'title': u'Video by %s' % uploader_id, 'thumbnail': self._og_search_thumbnail(webpage), From d41e6efc852c34da582790a54ecc4f5e9dbbedda Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 28 Oct 2013 10:44:02 +0100 Subject: [PATCH 210/264] New debug option --write-pages --- youtube_dl/__init__.py | 4 ++++ youtube_dl/extractor/common.py | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index a33dec785..48ffcbf8e 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -316,6 +316,9 @@ def parseOpts(overrideArguments=None): verbosity.add_option('--dump-intermediate-pages', action='store_true', dest='dump_intermediate_pages', default=False, help='print downloaded pages to debug problems(very verbose)') + verbosity.add_option('--write-pages', + action='store_true', dest='write_pages', default=False, + help='Write downloaded pages to files in the current directory') verbosity.add_option('--youtube-print-sig-code', action='store_true', dest='youtube_print_sig_code', default=False, help=optparse.SUPPRESS_HELP) @@ -652,6 +655,7 @@ def _real_main(argv=None): 'prefer_free_formats': opts.prefer_free_formats, 'verbose': opts.verbose, 'dump_intermediate_pages': opts.dump_intermediate_pages, + 'write_pages': opts.write_pages, 'test': opts.test, 'keepvideo': opts.keepvideo, 'min_filesize': opts.min_filesize, diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index aaa5c24c8..458635f1e 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -15,6 +15,7 @@ from ..utils import ( compiled_regex_type, ExtractorError, RegexNotFoundError, + sanitize_filename, unescapeHTML, ) @@ -182,6 +183,17 @@ class InfoExtractor(object): self.to_screen(u'Dumping request to ' + url) dump = base64.b64encode(webpage_bytes).decode('ascii') self._downloader.to_screen(dump) + if self._downloader.params.get('write_pages', False): + try: + url = url_or_request.get_full_url() + except AttributeError: + url = url_or_request + raw_filename = ('%s_%s.dump' % (video_id, url)) + filename = sanitize_filename(raw_filename, restricted=True) + self.to_screen(u'Saving request to ' + filename) + with open(filename, 'wb') as outf: + outf.write(webpage_bytes) + content = webpage_bytes.decode(encoding, 'replace') return (content, urlh) From 77d0a82fefd8ad7a2ab0662739aa2f039bed11ca Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 28 Oct 2013 11:24:47 +0100 Subject: [PATCH 211/264] [addanime] Use new formats system --- youtube_dl/extractor/addanime.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py index 465df8cf0..3b8258ad8 100644 --- a/youtube_dl/extractor/addanime.py +++ b/youtube_dl/extractor/addanime.py @@ -31,7 +31,8 @@ class AddAnimeIE(InfoExtractor): video_id = mobj.group('video_id') webpage = self._download_webpage(url, video_id) except ExtractorError as ee: - if not isinstance(ee.cause, compat_HTTPError): + if not isinstance(ee.cause, compat_HTTPError) or \ + ee.cause.code != 503: raise redir_webpage = ee.cause.read().decode('utf-8') @@ -60,18 +61,27 @@ class AddAnimeIE(InfoExtractor): note=u'Confirming after redirect') webpage = self._download_webpage(url, video_id) - video_url = self._search_regex(r"var (?:hq|normal)_video_file = '(.*?)';", - webpage, u'video file URL') - - video_extension = video_url[-3:] # mp4 or flv ? + formats = [] + for format_id in ('normal', 'hq'): + rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id) + video_url = self._search_regex(rex, webpage, u'video file URLx', + fatal=False) + if not video_url: + continue + formats.append({ + 'format_id': format_id, + 'url': video_url, + 'ext': video_url[-3:], + }) + if not formats: + raise ExtractorError(u'Cannot find any video format!') video_title = self._og_search_title(webpage) video_description = self._og_search_description(webpage) return { '_type': 'video', 'id': video_id, - 'url': video_url, - 'ext': video_extension, + 'formats': formats, 'title': video_title, 'description': video_description } From c1002e96e98f4851aed5de0142e8e2bd1ac4661c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 28 Oct 2013 11:28:02 +0100 Subject: [PATCH 212/264] Let extractors omit ext in formats --- youtube_dl/YoutubeDL.py | 3 +++ youtube_dl/extractor/addanime.py | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index d4654cc05..b09eeff32 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -484,6 +484,9 @@ class YoutubeDL(object): res=self.format_resolution(format), note=u' ({})'.format(format['format_note']) if format.get('format_note') is not None else '', ) + # Automatically determine file extension if missing + if 'ext' not in format: + format['ext'] = determine_ext(format['url']) if self.params.get('listformats', None): self.list_formats(info_dict) diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py index 3b8258ad8..b99d4b966 100644 --- a/youtube_dl/extractor/addanime.py +++ b/youtube_dl/extractor/addanime.py @@ -71,7 +71,6 @@ class AddAnimeIE(InfoExtractor): formats.append({ 'format_id': format_id, 'url': video_url, - 'ext': video_url[-3:], }) if not formats: raise ExtractorError(u'Cannot find any video format!') From 8abeeb94490e7066826ac086554be935a0c1dd94 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 28 Oct 2013 11:31:12 +0100 Subject: [PATCH 213/264] Nicer --list-formats output --- youtube_dl/YoutubeDL.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index b09eeff32..12621ff95 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -757,23 +757,23 @@ class YoutubeDL(object): archive_file.write(vid_id + u'\n') @staticmethod - def format_resolution(format): + def format_resolution(format, default='unknown'): if format.get('height') is not None: if format.get('width') is not None: res = u'%sx%s' % (format['width'], format['height']) else: res = u'%sp' % format['height'] else: - res = '???' + res = default return res def list_formats(self, info_dict): formats_s = [] for format in info_dict.get('formats', [info_dict]): - formats_s.append(u'%-15s: %-5s %-15s[%s]' % ( + formats_s.append(u'%-15s%-7s %-15s%s' % ( format['format_id'], format['ext'], - format.get('format_note') or '-', + format.get('format_note', ''), self.format_resolution(format), ) ) From 1003d108d51b7eb5edb84778ec234b217d72d4a5 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 28 Oct 2013 11:32:22 +0100 Subject: [PATCH 214/264] [vimeo] Support hash in URL (Fixes #1669) --- youtube_dl/extractor/vimeo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index ef90fecc0..b4dbcd2ee 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -20,12 +20,12 @@ class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?Phttps?://)?(?:(?:www|player)\.)?vimeo(?Ppro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)/?(?:[?].*)?$' + _VALID_URL = r'(?Phttps?://)?(?:(?:www|player)\.)?vimeo(?Ppro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)/?(?:[?].*)?(?:#.*)?$' _NETRC_MACHINE = 'vimeo' IE_NAME = u'vimeo' _TESTS = [ { - u'url': u'http://vimeo.com/56015672', + u'url': u'http://vimeo.com/56015672#at=0', u'file': u'56015672.mp4', u'md5': u'ae7a1d8b183758a0506b0622f37dfa14', u'info_dict': { From f088ea54863f17cad7d50d73b49042e18092de3e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 28 Oct 2013 11:34:21 +0100 Subject: [PATCH 215/264] release 2013.10.28 --- README.md | 23 +++++++++++++---------- youtube_dl/version.py | 2 +- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 2b8db0cfc..a2b296613 100644 --- a/README.md +++ b/README.md @@ -79,16 +79,17 @@ which means you can modify it, redistribute it or use it however you like. different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for the format description - (like "22 - 1280x720" or "HD")%(upload_date)s for - the upload date (YYYYMMDD), %(extractor)s for the - provider (youtube, metacafe, etc), %(id)s for the - video id , %(playlist)s for the playlist the - video is in, %(playlist_index)s for the position - in the playlist and %% for a literal percent. Use - - to output to stdout. Can also be used to - download to a different directory, for example - with -o '/my/downloads/%(uploader)s/%(title)s-%(i - d)s.%(ext)s' . + (like "22 - 1280x720" or "HD"),%(format_id)s for + the unique id of the format (like Youtube's + itags: "137"),%(upload_date)s for the upload date + (YYYYMMDD), %(extractor)s for the provider + (youtube, metacafe, etc), %(id)s for the video id + , %(playlist)s for the playlist the video is in, + %(playlist_index)s for the position in the + playlist and %% for a literal percent. Use - to + output to stdout. Can also be used to download to + a different directory, for example with -o '/my/d + ownloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' . --autonumber-size NUMBER Specifies the number of digits in %(autonumber)s when it is present in output filename template or --autonumber option is given @@ -126,6 +127,8 @@ which means you can modify it, redistribute it or use it however you like. -v, --verbose print various debugging information --dump-intermediate-pages print downloaded pages to debug problems(very verbose) + --write-pages Write downloaded pages to files in the current + directory ## Video Format Options: -f, --format FORMAT video format code, specifiy the order of diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b4ce6068f..048afc8e7 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.23.2' +__version__ = '2013.10.28' From a7685f3bf4275bfc0f390146e4ac99139d5b96b9 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 28 Oct 2013 11:41:32 +0100 Subject: [PATCH 216/264] mixcloud does not do any format selection --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 12621ff95..2a779373a 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -462,7 +462,7 @@ class YoutubeDL(object): info_dict['playlist_index'] = None # This extractors handle format selection themselves - if info_dict['extractor'] in [u'youtube', u'Youku', u'mixcloud']: + if info_dict['extractor'] in [u'youtube', u'Youku']: if download: self.process_info(info_dict) return info_dict From 78a3a9f89ef4a9918c0e6dc854b99df9c2a94e4e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 28 Oct 2013 11:41:43 +0100 Subject: [PATCH 217/264] Make "requested format not available" expected (#1655) --- youtube_dl/YoutubeDL.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 2a779373a..19dabef2d 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -524,7 +524,8 @@ class YoutubeDL(object): formats_to_download = [selected_format] break if not formats_to_download: - raise ExtractorError(u'requested format not available') + raise ExtractorError(u'requested format not available', + expected=True) if download: if len(formats_to_download) > 1: From 216d71d001989725b402a7ebee4715541314fd61 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Mon, 28 Oct 2013 16:28:35 +0100 Subject: [PATCH 218/264] Check if description and thumbnail are None to prevent crash --- youtube_dl/YoutubeDL.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 19dabef2d..313295839 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -575,9 +575,9 @@ class YoutubeDL(object): if self.params.get('forceurl', False): # For RTMP URLs, also include the playpath compat_print(info_dict['url'] + info_dict.get('play_path', u'')) - if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: + if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None: compat_print(info_dict['thumbnail']) - if self.params.get('forcedescription', False) and 'description' in info_dict: + if self.params.get('forcedescription', False) and info_dict.get('description') is not None: compat_print(info_dict['description']) if self.params.get('forcefilename', False) and filename is not None: compat_print(filename) From 369a759acc9d12590355c6d9f96ef7852153570f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 28 Oct 2013 16:54:38 +0100 Subject: [PATCH 219/264] setup.py: Make sure the setuptools_available variable is set Otherwise it would crash if it can't import setuptools. --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index f14f96377..aa7cfca08 100644 --- a/setup.py +++ b/setup.py @@ -11,6 +11,7 @@ try: setuptools_available = True except ImportError: from distutils.core import setup + setuptools_available = False try: # This will create an exe that needs Microsoft Visual C++ 2008 From 32a35e441874ad9daba10c29a6a33f13a4953fbb Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Mon, 28 Oct 2013 17:35:01 +0100 Subject: [PATCH 220/264] Add support for http://www.extremetube.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/extremetube.py | 52 +++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 youtube_dl/extractor/extremetube.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0d933986f..5eed1eebd 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -39,6 +39,7 @@ from .ehow import EHowIE from .eighttracks import EightTracksIE from .escapist import EscapistIE from .exfm import ExfmIE +from .extremetube import ExtremeTubeIE from .facebook import FacebookIE from .faz import FazIE from .fktv import ( diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py new file mode 100644 index 000000000..981de430d --- /dev/null +++ b/youtube_dl/extractor/extremetube.py @@ -0,0 +1,52 @@ +import os +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse_urlparse, + compat_urllib_request, + compat_urllib_parse, +) + +class ExtremeTubeIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Pextremetube\.com/video/.+?(?P[0-9]+))(?:[/?&]|$)' + _TEST = { + u'url': u'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431', + u'file': u'652431.mp4', + u'md5': u'1fb9228f5e3332ec8c057d6ac36f33e0', + u'info_dict': { + u"title": u"Music Video 14 british euro brit european cumshots swallow", + u"uploader": u"unknown", + u"age_limit": 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('videoid') + url = 'http://www.' + mobj.group('url') + + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'age_verified=1') + webpage = self._download_webpage(req, video_id) + + video_title = self._html_search_regex(r'

]*?title="([^"]+)"[^>]*>\1<', webpage, u'title') + uploader = self._html_search_regex(r'>Posted by:(?=<)(\s|<[^>]*>)*(.+?)\|', webpage, u'uploader', fatal=False) + video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, u'video_url')) + path = compat_urllib_parse_urlparse( video_url ).path + extension = os.path.splitext( path )[1][1:] + format = path.split('/')[5].split('_')[:2] + format = "-".join( format ) + + age_limit = self._rta_search(webpage) + + return { + 'id': video_id, + 'title': video_title, + 'uploader': uploader, + 'url': video_url, + 'ext': extension, + 'format': format, + 'format_id': format, + 'age_limit': age_limit, + } From 77ae65877e7b4b71d446ea928fd14f973826f07b Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Mon, 28 Oct 2013 18:18:58 +0100 Subject: [PATCH 221/264] Add support for http://www.mofosex.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/mofosex.py | 49 ++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 youtube_dl/extractor/mofosex.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0d933986f..045d4447a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -81,6 +81,7 @@ from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mit import TechTVMITIE, MITIE from .mixcloud import MixcloudIE +from .mofosex import MofosexIE from .mtv import MTVIE from .muzu import MuzuTVIE from .myspass import MySpassIE diff --git a/youtube_dl/extractor/mofosex.py b/youtube_dl/extractor/mofosex.py new file mode 100644 index 000000000..a0c926cd1 --- /dev/null +++ b/youtube_dl/extractor/mofosex.py @@ -0,0 +1,49 @@ +import os +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse_urlparse, + compat_urllib_request, + compat_urllib_parse, +) + +class MofosexIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Pmofosex\.com/videos/(?P[0-9]+)/.*?\.html)' + _TEST = { + u'url': u'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html', + u'file': u'5018.mp4', + u'md5': u'1b2eb47ac33cc75d4a80e3026b613c5a', + u'info_dict': { + u"title": u"Japanese Teen Music Video", + u"age_limit": 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('videoid') + url = 'http://www.' + mobj.group('url') + + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'age_verified=1') + webpage = self._download_webpage(req, video_id) + + video_title = self._html_search_regex(r'

(.+?)<', webpage, u'title') + video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, u'video_url')) + path = compat_urllib_parse_urlparse( video_url ).path + extension = os.path.splitext( path )[1][1:] + format = path.split('/')[5].split('_')[:2] + format = "-".join( format ) + + age_limit = self._rta_search(webpage) + + return { + 'id': video_id, + 'title': video_title, + 'url': video_url, + 'ext': extension, + 'format': format, + 'format_id': format, + 'age_limit': age_limit, + } From 2bc67c35acece68a75284b88fcb03d69f267a63c Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Mon, 28 Oct 2013 18:22:55 +0100 Subject: [PATCH 222/264] [KeezMoviesIE] Detect URLs with numbers in the SEO part correct --- youtube_dl/extractor/keezmovies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index 5e05900da..786924445 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -12,7 +12,7 @@ from ..aes import ( ) class KeezMoviesIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Pkeezmovies\.com/video/.+?(?P[0-9]+))' + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Pkeezmovies\.com/video/.+?(?P[0-9]+))(?:[/?&]|$)' _TEST = { u'url': u'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711', u'file': u'1214711.mp4', From dcc2a706ef7df65839aa40ce5fda61f8cea36645 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Mon, 28 Oct 2013 19:23:48 +0100 Subject: [PATCH 223/264] Add support for http://www.xtube.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/xtube.py | 54 ++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 youtube_dl/extractor/xtube.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0d933986f..7efd097e4 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -149,6 +149,7 @@ from .worldstarhiphop import WorldStarHipHopIE from .xhamster import XHamsterIE from .xnxx import XNXXIE from .xvideos import XVideosIE +from .xtube import XTubeIE from .yahoo import YahooIE, YahooSearchIE from .youjizz import YouJizzIE from .youku import YoukuIE diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py new file mode 100644 index 000000000..7d06a7021 --- /dev/null +++ b/youtube_dl/extractor/xtube.py @@ -0,0 +1,54 @@ +import os +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse_urlparse, + compat_urllib_request, + compat_urllib_parse, +) + +class XTubeIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Pxtube\.com/watch\.php\?v=(?P[^/?&]+))' + _TEST = { + u'url': u'http://www.xtube.com/watch.php?v=kVTUy_G222_', + u'file': u'kVTUy_G222_.mp4', + u'md5': u'092fbdd3cbe292c920ef6fc6a8a9cdab', + u'info_dict': { + u"title": u"strange erotica", + u"uploader": u"greenshowers", + u"age_limit": 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('videoid') + url = 'http://www.' + mobj.group('url') + + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'age_verified=1') + webpage = self._download_webpage(req, video_id) + + video_title = self._html_search_regex(r'
([^<]+)', webpage, u'description', default=None) + video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, u'video_url').replace('\\/', '/') + path = compat_urllib_parse_urlparse( video_url ).path + extension = os.path.splitext( path )[1][1:] + format = path.split('/')[5].split('_')[:2] + format[0] += 'p' + format[1] += 'k' + format = "-".join( format ) + + return { + 'id': video_id, + 'title': video_title, + 'uploader': video_uploader, + 'description': video_description, + 'url': video_url, + 'ext': extension, + 'format': format, + 'format_id': format, + 'age_limit': 18, + } From 702665c0854af6fb317600c4825c0b00e2a4c981 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 28 Oct 2013 22:01:37 +0100 Subject: [PATCH 224/264] tests: build the filename from the info_dict if the 'file' key is missing It will need to have the 'id' and 'ext' keys to work. --- test/test_download.py | 39 +++++++++++++++++++++++---------------- youtube_dl/YoutubeDL.py | 2 +- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/test/test_download.py b/test/test_download.py index b9a9be11d..f136176b1 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -60,9 +60,12 @@ def generator(test_case): if not ie._WORKING: print_skipping('IE marked as not _WORKING') return - if 'playlist' not in test_case and not test_case['file']: - print_skipping('No output file specified') - return + if 'playlist' not in test_case: + info_dict = test_case.get('info_dict', {}) + if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')): + print_skipping('The output file cannot be know, the "file" ' + 'key is missing or the info_dict is incomplete') + return if 'skip' in test_case: print_skipping(test_case['skip']) return @@ -77,11 +80,17 @@ def generator(test_case): finished_hook_called.add(status['filename']) ydl.fd.add_progress_hook(_hook) + def get_tc_filename(tc): + return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {})) + test_cases = test_case.get('playlist', [test_case]) - for tc in test_cases: - try_rm(tc['file']) - try_rm(tc['file'] + '.part') - try_rm(tc['file'] + '.info.json') + def try_rm_tcs_files(): + for tc in test_cases: + tc_filename = get_tc_filename(tc) + try_rm(tc_filename) + try_rm(tc_filename + '.part') + try_rm(tc_filename + '.info.json') + try_rm_tcs_files() try: for retry in range(1, RETRIES + 1): try: @@ -98,14 +107,15 @@ def generator(test_case): break for tc in test_cases: + tc_filename = get_tc_filename(tc) if not test_case.get('params', {}).get('skip_download', False): - self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file']) - self.assertTrue(tc['file'] in finished_hook_called) - self.assertTrue(os.path.exists(tc['file'] + '.info.json')) + self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename) + self.assertTrue(tc_filename in finished_hook_called) + self.assertTrue(os.path.exists(tc_filename + '.info.json')) if 'md5' in tc: - md5_for_file = _file_md5(tc['file']) + md5_for_file = _file_md5(tc_filename) self.assertEqual(md5_for_file, tc['md5']) - with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof: + with io.open(tc_filename + '.info.json', encoding='utf-8') as infof: info_dict = json.load(infof) for (info_field, expected) in tc.get('info_dict', {}).items(): if isinstance(expected, compat_str) and expected.startswith('md5:'): @@ -126,10 +136,7 @@ def generator(test_case): for key in ('id', 'url', 'title', 'ext'): self.assertTrue(key in info_dict.keys() and info_dict[key]) finally: - for tc in test_cases: - try_rm(tc['file']) - try_rm(tc['file'] + '.part') - try_rm(tc['file'] + '.info.json') + try_rm_tcs_files() return test_template diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 313295839..060678e9b 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -272,7 +272,7 @@ class YoutubeDL(object): autonumber_size = 5 autonumber_templ = u'%0' + str(autonumber_size) + u'd' template_dict['autonumber'] = autonumber_templ % self._num_downloads - if template_dict['playlist_index'] is not None: + if template_dict.get('playlist_index') is not None: template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index'] sanitize = lambda k, v: sanitize_filename( From 2563bcc85cc09382d7e731709b2c8a4ad96c7ea3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 28 Oct 2013 22:02:17 +0100 Subject: [PATCH 225/264] Add an extractor for MySpace (closes #1666) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/myspace.py | 48 ++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 youtube_dl/extractor/myspace.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0d933986f..caaf54456 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -83,6 +83,7 @@ from .mit import TechTVMITIE, MITIE from .mixcloud import MixcloudIE from .mtv import MTVIE from .muzu import MuzuTVIE +from .myspace import MySpaceIE from .myspass import MySpassIE from .myvideo import MyVideoIE from .naver import NaverIE diff --git a/youtube_dl/extractor/myspace.py b/youtube_dl/extractor/myspace.py new file mode 100644 index 000000000..050f54a5a --- /dev/null +++ b/youtube_dl/extractor/myspace.py @@ -0,0 +1,48 @@ +import re +import json + +from .common import InfoExtractor +from ..utils import ( + compat_str, +) + + +class MySpaceIE(InfoExtractor): + _VALID_URL = r'https?://myspace\.com/([^/]+)/video/[^/]+/(?P\d+)' + + _TEST = { + u'url': u'https://myspace.com/coldplay/video/viva-la-vida/100008689', + u'info_dict': { + u'id': u'100008689', + u'ext': u'flv', + u'title': u'Viva La Vida', + u'description': u'The official Viva La Vida video, directed by Hype Williams', + u'uploader': u'Coldplay', + u'uploader_id': u'coldplay', + }, + u'params': { + # rtmp download + u'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + context = json.loads(self._search_regex(r'context = ({.*?});', webpage, + u'context')) + video = context['video'] + rtmp_url, play_path = video['streamUrl'].split(';', 1) + + return { + 'id': compat_str(video['mediaId']), + 'title': video['title'], + 'url': rtmp_url, + 'play_path': play_path, + 'ext': 'flv', + 'description': video['description'], + 'thumbnail': video['imageUrl'], + 'uploader': video['artistName'], + 'uploader_id': video['artistUsername'], + } From dd508b7c4f0dd8881de07a4e8593d4fcdef9bae7 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Mon, 28 Oct 2013 18:03:26 -0400 Subject: [PATCH 226/264] [tests] don't fail on network errors This is suboptimal, but at least this way we will need to look at the logs only to check for network errors that happen too often, instead of parsing a ton of lines each time to see if there is some true test failing --- test/helper.py | 17 +++++++++++++++++ test/test_download.py | 22 +++++++++++++++++----- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/test/helper.py b/test/helper.py index 777119ea5..d7bf7a828 100644 --- a/test/helper.py +++ b/test/helper.py @@ -5,9 +5,11 @@ import json import os.path import re import types +import sys import youtube_dl.extractor from youtube_dl import YoutubeDL +from youtube_dl.utils import preferredencoding def global_setup(): @@ -33,6 +35,21 @@ def try_rm(filename): raise +def report_warning(message): + ''' + Print the message to stderr, it will be prefixed with 'WARNING:' + If stderr is a tty file the 'WARNING:' will be colored + ''' + if sys.stderr.isatty() and os.name != 'nt': + _msg_header = u'\033[0;33mWARNING:\033[0m' + else: + _msg_header = u'WARNING:' + output = u'%s %s\n' % (_msg_header, message) + if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3: + output = output.encode(preferredencoding()) + sys.stderr.write(output) + + class FakeYDL(YoutubeDL): def __init__(self, override=None): # Different instances of the downloader can't share the same dictionary diff --git a/test/test_download.py b/test/test_download.py index f136176b1..565afa1b5 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -6,7 +6,14 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import get_params, get_testcases, global_setup, try_rm, md5 +from test.helper import ( + get_params, + get_testcases, + global_setup, + try_rm, + md5, + report_warning +) global_setup() @@ -92,17 +99,22 @@ def generator(test_case): try_rm(tc_filename + '.info.json') try_rm_tcs_files() try: - for retry in range(1, RETRIES + 1): + try_num = 1 + while True: try: ydl.download([test_case['url']]) except (DownloadError, ExtractorError) as err: - if retry == RETRIES: raise - # Check if the exception is not a network related one if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): raise - print('Retrying: {0} failed tries\n\n##########\n\n'.format(retry)) + if try_num == RETRIES: + report_warning(u'Failed due to network errors, skipping...') + return + + print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num)) + + try_num += 1 else: break From 646e17a53d3885b84b03045728b3add3d50f513c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 28 Oct 2013 23:18:13 +0100 Subject: [PATCH 227/264] Fix YouTubeDL test --- test/test_YoutubeDL.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index f8cd1bdce..ffebb4ae5 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -62,10 +62,10 @@ class TestFormatSelection(unittest.TestCase): def test_format_limit(self): formats = [ - {u'format_id': u'meh'}, - {u'format_id': u'good'}, - {u'format_id': u'great'}, - {u'format_id': u'excellent'}, + {u'format_id': u'meh', u'url': u'http://example.com/meh'}, + {u'format_id': u'good', u'url': u'http://example.com/good'}, + {u'format_id': u'great', u'url': u'http://example.com/great'}, + {u'format_id': u'excellent', u'url': u'http://example.com/exc'}, ] info_dict = { u'formats': formats, u'extractor': u'test', 'id': 'testvid'} From 321a01f97110c3048e9d9c360a099d1ec8cd4479 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 28 Oct 2013 23:37:01 +0100 Subject: [PATCH 228/264] [mtv] Remove the templates from the mediagen url --- youtube_dl/extractor/mtv.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index e520e2bb4..e96d3952c 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -80,6 +80,8 @@ class MTVIE(InfoExtractor): video_id = self._id_from_uri(uri) self.report_extraction(video_id) mediagen_url = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib['url'] + # Remove the templates, like &device={device} + mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', u'', mediagen_url) if 'acceptMethods' not in mediagen_url: mediagen_url += '&acceptMethods=fms' mediagen_page = self._download_webpage(mediagen_url, video_id, From f6cc16f5d821a50df173b865164e4fa9cbe854af Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Mon, 28 Oct 2013 19:07:16 -0400 Subject: [PATCH 229/264] [tests] a HTTP 503 is a transient issue --- test/test_download.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_download.py b/test/test_download.py index 565afa1b5..dfb04d010 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -26,6 +26,7 @@ import youtube_dl.YoutubeDL from youtube_dl.utils import ( compat_str, compat_urllib_error, + compat_HTTPError, DownloadError, ExtractorError, UnavailableVideoError, @@ -105,7 +106,7 @@ def generator(test_case): ydl.download([test_case['url']]) except (DownloadError, ExtractorError) as err: # Check if the exception is not a network related one - if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): + if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503): raise if try_num == RETRIES: From 795f28f871074aca2a74dfe67e1e75252b525c4c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 29 Oct 2013 06:45:54 +0100 Subject: [PATCH 230/264] [youtube] Fix login (Fixes #1681) --- youtube_dl/extractor/youtube.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index d05d0a8c1..f3a2a32b4 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -74,14 +74,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err)) return False - galx = None - dsh = None - match = re.search(re.compile(r' Date: Tue, 29 Oct 2013 06:48:39 +0100 Subject: [PATCH 231/264] release 2013.10.29 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 048afc8e7..1a94003bc 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.28' +__version__ = '2013.10.29' From 912cbf5d4ef5b131af88e63815863c389083d077 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 29 Oct 2013 14:00:01 +0100 Subject: [PATCH 232/264] [vevo] Fix timestamp handling ( / 1000 is implicit float division ) --- youtube_dl/extractor/vevo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 1c1cc418d..26ec9fa1b 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -58,9 +58,9 @@ class VevoIE(InfoExtractor): 'width': int(attr['frameWidth']), }) - date_epoch = int(self._search_regex( - r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))/1000 - upload_date = datetime.datetime.fromtimestamp(date_epoch) + timestamp_ms = int(self._search_regex( + r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date')) + upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000) info = { 'id': video_id, 'title': video_info['title'], From 57dd9a8f2f5885fb3d909c4905adb69b4749491c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 29 Oct 2013 15:09:45 +0100 Subject: [PATCH 233/264] Nicer --list-formats output --- youtube_dl/YoutubeDL.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 060678e9b..260cd2809 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -759,6 +759,8 @@ class YoutubeDL(object): @staticmethod def format_resolution(format, default='unknown'): + if format.get('_resolution') is not None: + return format['_resolution'] if format.get('height') is not None: if format.get('width') is not None: res = u'%sx%s' % (format['width'], format['height']) @@ -769,19 +771,22 @@ class YoutubeDL(object): return res def list_formats(self, info_dict): - formats_s = [] - for format in info_dict.get('formats', [info_dict]): - formats_s.append(u'%-15s%-7s %-15s%s' % ( + def line(format): + return (u'%-15s%-10s%-12s%s' % ( format['format_id'], format['ext'], - format.get('format_note', ''), self.format_resolution(format), + format.get('format_note', ''), ) ) + + formats_s = list(map(line, info_dict.get('formats', [info_dict]))) if len(formats_s) != 1: - formats_s[0] += ' (worst)' - formats_s[-1] += ' (best)' - formats_s = "\n".join(formats_s) - self.to_screen(u'[info] Available formats for %s:\n' - u'format code extension note resolution\n%s' % ( - info_dict['id'], formats_s)) + formats_s[0] += (' ' if formats_s[0] else '') + '(worst)' + formats_s[-1] += (' ' if formats_s[-1] else '') + '(best)' + + header_line = line({ + 'format_id': u'format code', 'ext': u'extension', + '_resolution': u'resolution', 'format_note': u'note'}) + self.to_screen(u'[info] Available formats for %s:\n%s\n%s' % + (info_dict['id'], header_line, u"\n".join(formats_s))) From e54fd4b23b8110779e8caff805d3078dcf042d0b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 29 Oct 2013 15:10:09 +0100 Subject: [PATCH 234/264] [vevo] Add more format details --- youtube_dl/extractor/vevo.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 26ec9fa1b..4d9f2a843 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -50,10 +50,11 @@ class VevoIE(InfoExtractor): # Already sorted from worst to best quality for rend in renditions.findall('rendition'): attr = rend.attrib - f_url = attr['url'] + format_note = '%(videoCodec)s@%(videoBitrate)4sK, %(audioCodec)s@%(audioBitrate)3sK' % attr formats.append({ - 'url': f_url, - 'ext': determine_ext(f_url), + 'url': attr['url'], + 'format_id': attr['name'], + 'format_note': format_note, 'height': int(attr['frameheight']), 'width': int(attr['frameWidth']), }) @@ -71,7 +72,4 @@ class VevoIE(InfoExtractor): 'duration': video_info['duration'], } - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - return info From 21c924f4068692786e0c5435689d10f3d17ef612 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 29 Oct 2013 20:58:49 +0100 Subject: [PATCH 235/264] [arte] Download the 'Originalversion' version if it's the only one available (fixes #1682) --- youtube_dl/extractor/arte.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index d39b48951..e10c74c11 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -158,7 +158,9 @@ class ArteTVPlus7IE(InfoExtractor): 'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), } - formats = player_info['VSR'].values() + all_formats = player_info['VSR'].values() + # Some formats use the m3u8 protocol + all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats)) def _match_lang(f): if f.get('versionCode') is None: return True @@ -170,11 +172,16 @@ class ArteTVPlus7IE(InfoExtractor): regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] return any(re.match(r, f['versionCode']) for r in regexes) # Some formats may not be in the same language as the url - formats = filter(_match_lang, formats) - # Some formats use the m3u8 protocol - formats = filter(lambda f: f.get('videoFormat') != 'M3U8', formats) - # We order the formats by quality + formats = filter(_match_lang, all_formats) formats = list(formats) # in python3 filter returns an iterator + if not formats: + # Some videos are only available in the 'Originalversion' + # they aren't tagged as being in French or German + if all(f['versionCode'] == 'VO' for f in all_formats): + formats = all_formats + else: + raise ExtractorError(u'The formats list is empty') + # We order the formats by quality if re.match(r'[A-Z]Q', formats[0]['quality']) is not None: sort_key = lambda f: ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality']) else: From b9a836515fad5df57a86412b2cd41c49869ec0d6 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Tue, 29 Oct 2013 16:44:35 -0400 Subject: [PATCH 236/264] Update the Vimeo test vector md5 confirmed that this is indeed the first 10241 (we went off by one with byte range 0-10240) of the full, playing mp4, so they probably reencoded or something --- youtube_dl/extractor/vimeo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index b4dbcd2ee..c7d864a2b 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -27,7 +27,7 @@ class VimeoIE(InfoExtractor): { u'url': u'http://vimeo.com/56015672#at=0', u'file': u'56015672.mp4', - u'md5': u'ae7a1d8b183758a0506b0622f37dfa14', + u'md5': u'8879b6cc097e987f02484baf890129e5', u'info_dict': { u"upload_date": u"20121220", u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", From 94badb2599e54bfd711b38f3a74c552ff652d6d3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 30 Oct 2013 01:09:26 +0100 Subject: [PATCH 237/264] Fix output indenting for --list-formats --- youtube_dl/YoutubeDL.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 260cd2809..898533496 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -780,10 +780,11 @@ class YoutubeDL(object): ) ) - formats_s = list(map(line, info_dict.get('formats', [info_dict]))) - if len(formats_s) != 1: - formats_s[0] += (' ' if formats_s[0] else '') + '(worst)' - formats_s[-1] += (' ' if formats_s[-1] else '') + '(best)' + formats = info_dict.get('formats', [info_dict]) + formats_s = list(map(line, formats)) + if len(formats) > 1: + formats_s[0] += (' ' if formats[0].get('format_note') else '') + '(worst)' + formats_s[-1] += (' ' if formats[-1].get('format_note') else '') + '(best)' header_line = line({ 'format_id': u'format code', 'ext': u'extension', From b5d0d817bc8a23ef6dc2a00d1af6fad893143206 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 30 Oct 2013 01:09:44 +0100 Subject: [PATCH 238/264] Remove superfluous space --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index ce349fe20..cef4dce85 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -63,7 +63,7 @@ class InfoExtractor(object): * ext Will be calculated from url if missing * format A human-readable description of the format ("mp4 container with h264/opus"). - Calculated from the format_id, width, height + Calculated from the format_id, width, height. and format_note fields if missing. * format_id A short description of the format ("mp4_h264_opus" or "19") From 72321ead7b176824d1a8b2895ad4926555e41b88 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 30 Oct 2013 01:14:17 +0100 Subject: [PATCH 239/264] [vevo] Readd support for SMIL (Fixes #1683) --- youtube_dl/extractor/vevo.py | 80 +++++++++++++++++++++++++++++------- 1 file changed, 66 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 4d9f2a843..3f6020f74 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -5,7 +5,7 @@ import datetime from .common import InfoExtractor from ..utils import ( - determine_ext, + compat_HTTPError, ExtractorError, ) @@ -16,26 +16,22 @@ class VevoIE(InfoExtractor): (currently used by MTVIE) """ _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P.*?)(\?|$)' - _TEST = { + _TESTS = [{ u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', u'file': u'GB1101300280.mp4', + u"md5": u"06bea460acb744eab74a9d7dcb4bfd61", u'info_dict': { u"upload_date": u"20130624", u"uploader": u"Hurts", u"title": u"Somebody to Die For", - u'duration': 230, + u"duration": 230, + u"width": 1920, + u"height": 1080, } - } + }] + _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/' - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id - info_json = self._download_webpage(json_url, video_id, u'Downloading json info') - - self.report_extraction(video_id) - video_info = json.loads(info_json)['video'] + def _formats_from_json(self, video_info): last_version = {'version': -1} for version in video_info['videoVersions']: # These are the HTTP downloads, other types are for different manifests @@ -50,7 +46,7 @@ class VevoIE(InfoExtractor): # Already sorted from worst to best quality for rend in renditions.findall('rendition'): attr = rend.attrib - format_note = '%(videoCodec)s@%(videoBitrate)4sK, %(audioCodec)s@%(audioBitrate)3sK' % attr + format_note = '%(videoCodec)s@%(videoBitrate)4sk, %(audioCodec)s@%(audioBitrate)3sk' % attr formats.append({ 'url': attr['url'], 'format_id': attr['name'], @@ -58,6 +54,62 @@ class VevoIE(InfoExtractor): 'height': int(attr['frameheight']), 'width': int(attr['frameWidth']), }) + return formats + + def _formats_from_smil(self, smil_xml): + formats = [] + smil_doc = xml.etree.ElementTree.fromstring(smil_xml.encode('utf-8')) + els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video') + for el in els: + src = el.attrib['src'] + m = re.match(r'''(?xi) + (?P[a-z0-9]+): + (?P + [/a-z0-9]+ # The directory and main part of the URL + _(?P[0-9]+)k + _(?P[0-9]+)x(?P[0-9]+) + _(?P[a-z0-9]+) + _(?P[0-9]+) + _(?P[a-z0-9]+) + _(?P[0-9]+) + \.[a-z0-9]+ # File extension + )''', src) + if not m: + continue + + format_url = self._SMIL_BASE_URL + m.group('path') + format_note = ('%(vcodec)s@%(vbr)4sk, %(acodec)s@%(abr)3sk' % + m.groupdict()) + formats.append({ + 'url': format_url, + 'format_id': u'SMIL_' + m.group('cbr'), + 'format_note': format_note, + 'ext': m.group('ext'), + 'width': int(m.group('width')), + 'height': int(m.group('height')), + }) + return formats + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id + info_json = self._download_webpage(json_url, video_id, u'Downloading json info') + video_info = json.loads(info_json)['video'] + + formats = self._formats_from_json(video_info) + try: + smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % ( + self._SMIL_BASE_URL, video_id, video_id.lower()) + smil_xml = self._download_webpage(smil_url, video_id, + u'Downloading SMIL info') + formats.extend(self._formats_from_smil(smil_xml)) + except ExtractorError as ee: + if not isinstance(ee.cause, compat_HTTPError): + raise + self._downloader.report_warning( + u'Cannot download SMIL information, falling back to JSON ..') timestamp_ms = int(self._search_regex( r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date')) From 7193498811cb17a66ca57569a8588adb28ba2b27 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 30 Oct 2013 01:17:00 +0100 Subject: [PATCH 240/264] Use index in formt string (Fixes vevo test on Python 2.6) --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 898533496..7f73ea360 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -482,7 +482,7 @@ class YoutubeDL(object): format['format'] = u'{id} - {res}{note}'.format( id=format['format_id'], res=self.format_resolution(format), - note=u' ({})'.format(format['format_note']) if format.get('format_note') is not None else '', + note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '', ) # Automatically determine file extension if missing if 'ext' not in format: From 33b1d9595d853893b5d732863dc2f5eabd939637 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 30 Oct 2013 01:17:20 +0100 Subject: [PATCH 241/264] release 2013.10.30 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 1a94003bc..e8eade7ad 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.29' +__version__ = '2013.10.30' From 9f1109a56424d118263963062bc5185d8415835e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 31 Oct 2013 00:20:49 +0100 Subject: [PATCH 242/264] [dailymotion] Fix support for age-restricted videos (Fixes #1688) --- youtube_dl/extractor/dailymotion.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 4c0488245..355b4ed0a 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -21,6 +21,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor): """Build a request with the family filter disabled""" request = compat_urllib_request.Request(url) request.add_header('Cookie', 'family_filter=off') + request.add_header('Cookie', 'ff=off') return request class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): @@ -61,6 +62,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): }, u'skip': u'VEVO is only available in some countries', }, + # age-restricted video + { + u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband', + u'file': u'xyh2zz.mp4', + u'md5': u'0d667a7b9cebecc3c89ee93099c4159d', + u'info_dict': { + u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]', + u'uploader': 'HotWaves1012', + u'age_limit': 18, + } + + } ] def _real_extract(self, url): @@ -90,7 +103,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): video_uploader = self._search_regex([r'(?im)[^<]+?]+?>([^<]+?)', # Looking for official user r'<(?:span|a) .*?rel="author".*?>([^<]+?)([0-9]{2})-([0-9]{2})-([0-9]{4})
', webpage) @@ -132,15 +146,16 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): self._list_available_subtitles(video_id) return - return [{ + return { 'id': video_id, 'formats': formats, 'uploader': video_uploader, 'upload_date': video_upload_date, 'title': self._og_search_title(webpage), 'subtitles': video_subtitles, - 'thumbnail': info['thumbnail_url'] - }] + 'thumbnail': info['thumbnail_url'], + 'age_limit': age_limit, + } def _get_available_subtitles(self, video_id): try: From 0ef7ad5cd49d527a24c62e831cf80f2eb443276f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 31 Oct 2013 07:55:03 +0100 Subject: [PATCH 243/264] Fix the test for dailymotion subtitles The extractor returns a single info_dict now. --- test/test_dailymotion_subtitles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_dailymotion_subtitles.py b/test/test_dailymotion_subtitles.py index c596415c4..ba3580ea4 100644 --- a/test/test_dailymotion_subtitles.py +++ b/test/test_dailymotion_subtitles.py @@ -22,7 +22,7 @@ class TestDailymotionSubtitles(unittest.TestCase): return info_dict def getSubtitles(self): info_dict = self.getInfoDict() - return info_dict[0]['subtitles'] + return info_dict['subtitles'] def test_no_writesubtitles(self): subtitles = self.getSubtitles() self.assertEqual(subtitles, None) From 5f1ea943ab6814c2f8ca2a383f990e3f4c9e5f87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 31 Oct 2013 08:07:26 +0100 Subject: [PATCH 244/264] [livestream] fix the extraction of events It now uses a json dictionary from the webpage. --- youtube_dl/extractor/livestream.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index d04da98c8..4531fd6ab 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -40,13 +40,9 @@ class LivestreamIE(InfoExtractor): if video_id is None: # This is an event page: - player = get_meta_content('twitter:player', webpage) - if player is None: - raise ExtractorError('Couldn\'t extract event api url') - api_url = player.replace('/player', '') - api_url = re.sub(r'^(https?://)(new\.)', r'\1api.\2', api_url) - info = json.loads(self._download_webpage(api_url, event_name, - u'Downloading event info')) + config_json = self._search_regex(r'window.config = ({.*?});', + webpage, u'window config') + info = json.loads(config_json)['event'] videos = [self._extract_video_info(video_data['data']) for video_data in info['feed']['data'] if video_data['type'] == u'video'] return self.playlist_result(videos, info['id'], info['full_name']) From ab4e15134719e6c01a3a9768f21a0f361e4b781d Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Fri, 1 Nov 2013 01:24:23 +0100 Subject: [PATCH 245/264] [CinemassacreIE] Support more embed urls --- youtube_dl/extractor/cinemassacre.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py index 2fe1033f0..8f9396d6b 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/cinemassacre.py @@ -41,7 +41,7 @@ class CinemassacreIE(InfoExtractor): webpage_url = u'http://' + mobj.group('url') webpage = self._download_webpage(webpage_url, None) # Don't know video id yet video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') - mobj = re.search(r'src="(?Phttp://player\.screenwavemedia\.com/play/(?:embed|player)\.php\?id=(?:Cinemassacre-)?(?P.+?))"', webpage) + mobj = re.search(r'src="(?Phttp://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P.+?))"', webpage) if not mobj: raise ExtractorError(u'Can\'t extract embed url and video id') playerdata_url = mobj.group(u'embed_url') From 66cf3ac3426b62fb960b4de770c4ea8203a0e205 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 1 Nov 2013 11:55:35 +0100 Subject: [PATCH 246/264] [metacafe] Fix support for age-restricted videos (fixes #1696) The 'Content-Type' header must be set for disabling the family filter. The 'flashversion' cookie is only needed for AnyClip videos. Added tests for standard metacafe videos and for age-restricted videos. Also set the 'age_limit' field. --- youtube_dl/extractor/metacafe.py | 51 ++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index 234b9e80f..91480ba87 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -20,7 +20,9 @@ class MetacafeIE(InfoExtractor): _DISCLAIMER = 'http://www.metacafe.com/family_filter/' _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' IE_NAME = u'metacafe' - _TESTS = [{ + _TESTS = [ + # Youtube video + { u"add_ie": ["Youtube"], u"url": u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/", u"file": u"_aUehQsCQtM.mp4", @@ -32,15 +34,42 @@ class MetacafeIE(InfoExtractor): u"uploader_id": u"PBS" } }, + # Normal metacafe video + { + u'url': u'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/', + u'md5': u'6e0bca200eaad2552e6915ed6fd4d9ad', + u'info_dict': { + u'id': u'11121940', + u'ext': u'mp4', + u'title': u'News: Stuff You Won\'t Do with Your PlayStation 4', + u'uploader': u'ign', + u'description': u'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.', + }, + }, + # AnyClip video { u"url": u"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/", u"file": u"an-dVVXnuY7Jh77J.mp4", u"info_dict": { u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3", u"uploader": u"anyclip", - u"description": u"md5:38c711dd98f5bb87acf973d573442e67" - } - }] + u"description": u"md5:38c711dd98f5bb87acf973d573442e67", + }, + }, + # age-restricted video + { + u'url': u'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/', + u'md5': u'98dde7c1a35d02178e8ab7560fe8bd09', + u'info_dict': { + u'id': u'5186653', + u'ext': u'mp4', + u'title': u'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.', + u'uploader': u'Dwayne Pipe', + u'description': u'md5:950bf4c581e2c059911fa3ffbe377e4b', + u'age_limit': 18, + }, + }, + ] def report_disclaimer(self): @@ -62,6 +91,7 @@ class MetacafeIE(InfoExtractor): 'submit': "Continue - I'm over 18", } request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form)) + request.add_header('Content-Type', 'application/x-www-form-urlencoded') try: self.report_age_confirmation() compat_urllib_request.urlopen(request).read() @@ -83,7 +113,12 @@ class MetacafeIE(InfoExtractor): # Retrieve video webpage to extract further information req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id) - req.headers['Cookie'] = 'flashVersion=0;' + + # AnyClip videos require the flashversion cookie so that we get the link + # to the mp4 file + mobj_an = re.match(r'^an-(.*?)$', video_id) + if mobj_an: + req.headers['Cookie'] = 'flashVersion=0;' webpage = self._download_webpage(req, video_id) # Extract URL, uploader and title from webpage @@ -125,6 +160,11 @@ class MetacafeIE(InfoExtractor): r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);', webpage, u'uploader nickname', fatal=False) + if re.search(r'"contentRating":"restricted"', webpage) is not None: + age_limit = 18 + else: + age_limit = 0 + return { '_type': 'video', 'id': video_id, @@ -134,4 +174,5 @@ class MetacafeIE(InfoExtractor): 'upload_date': None, 'title': video_title, 'ext': video_ext, + 'age_limit': age_limit, } From 60d142aa8d896674ca2b062a53b3d18c644192ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 1 Nov 2013 22:28:51 +0100 Subject: [PATCH 247/264] Add an extractor for vk.com (closes #1635) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/vk.py | 45 ++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 youtube_dl/extractor/vk.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index caaf54456..bcf1cce7f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -142,6 +142,7 @@ from .videofyme import VideofyMeIE from .videopremium import VideoPremiumIE from .vimeo import VimeoIE, VimeoChannelIE from .vine import VineIE +from .vk import VKIE from .wat import WatIE from .websurg import WeBSurgIE from .weibo import WeiboIE diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py new file mode 100644 index 000000000..90d8a6d07 --- /dev/null +++ b/youtube_dl/extractor/vk.py @@ -0,0 +1,45 @@ +# encoding: utf-8 +import re +import json + +from .common import InfoExtractor +from ..utils import ( + compat_str, + unescapeHTML, +) + + +class VKIE(InfoExtractor): + IE_NAME = u'vk.com' + _VALID_URL = r'https?://vk\.com/(?:videos.*?\?.*?z=)?video(?P.*?)(?:\?|%2F|$)' + + _TEST = { + u'url': u'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', + u'md5': u'0deae91935c54e00003c2a00646315f0', + u'info_dict': { + u'id': u'162222515', + u'ext': u'flv', + u'title': u'ProtivoGunz - Хуёвая песня', + u'uploader': u'Noize MC', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id + info_page = self._download_webpage(info_url, video_id) + m_yt = re.search(r'src="(http://www.youtube.com/.*?)"', info_page) + if m_yt is not None: + self.to_screen(u'Youtube video detected') + return self.url_result(m_yt.group(1), 'Youtube') + vars_json = self._search_regex(r'var vars = ({.*?});', info_page, u'vars') + vars = json.loads(vars_json) + + return { + 'id': compat_str(vars['vid']), + 'url': vars['url240'], + 'title': unescapeHTML(vars['md_title']), + 'thumbnail': vars['jpg'], + 'uploader': vars['md_author'], + } From 8eddf3e91ddab3bb766bc5176edb3120be5743ea Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 2 Nov 2013 11:21:05 +0100 Subject: [PATCH 248/264] [youtube] Encode subtitle track name in request (Fixes #1700) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f3a2a32b4..dc601de52 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1111,7 +1111,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'lang': lang, 'v': video_id, 'fmt': self._downloader.params.get('subtitlesformat'), - 'name': l[0], + 'name': l[0].encode('utf-8'), }) url = u'http://www.youtube.com/api/timedtext?' + params sub_lang_list[lang] = url From aa2484e390d8a5e74d740fda61b4062a4a8c1d0e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 2 Nov 2013 11:21:36 +0100 Subject: [PATCH 249/264] release 2013.11.02 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e8eade7ad..75a46a2d5 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.30' +__version__ = '2013.11.02' From 72a5b4f70216fe1a5b1c22be34653ae0ff81058a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 2 Nov 2013 19:01:01 +0100 Subject: [PATCH 250/264] Add an extractor for bambuser.com (#1702) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/bambuser.py | 42 ++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 youtube_dl/extractor/bambuser.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index bcf1cce7f..a1e35eb46 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -9,6 +9,7 @@ from .arte import ( ArteTVFutureIE, ) from .auengine import AUEngineIE +from .bambuser import BambuserIE from .bandcamp import BandcampIE from .bliptv import BlipTVIE, BlipTVUserIE from .bloomberg import BloombergIE diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py new file mode 100644 index 000000000..cf8da22e3 --- /dev/null +++ b/youtube_dl/extractor/bambuser.py @@ -0,0 +1,42 @@ +import re +import json + +from .common import InfoExtractor + + +class BambuserIE(InfoExtractor): + _VALID_URL = r'https?://bambuser\.com/v/(?P\d+)' + _API_KEY = '005f64509e19a868399060af746a00aa' + + _TEST = { + u'url': u'http://bambuser.com/v/4050584', + u'md5': u'fba8f7693e48fd4e8641b3fd5539a641', + u'info_dict': { + u'id': u'4050584', + u'ext': u'flv', + u'title': u'Education engineering days - lightning talks', + u'duration': 3741, + u'uploader': u'pixelversity', + u'uploader_id': u'344706', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + info_url = ('http://player-c.api.bambuser.com/getVideo.json?' + '&api_key=%s&vid=%s' % (self._API_KEY, video_id)) + info_json = self._download_webpage(info_url, video_id) + info = json.loads(info_json)['result'] + + return { + 'id': video_id, + 'title': info['title'], + 'url': info['url'], + 'thumbnail': info['preview'], + 'duration': int(info['length']), + 'view_count': int(info['views_total']), + 'uploader': info['username'], + 'uploader_id': info['uid'], + } + From 165e3bb67a6d737f33d0aa2024c652b363d85ebe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 2 Nov 2013 19:50:57 +0100 Subject: [PATCH 251/264] [bambuser] Add an extractor for channels (closes #1702) --- test/test_playlists.py | 9 +++++++ youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/bambuser.py | 40 +++++++++++++++++++++++++++++++- 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/test/test_playlists.py b/test/test_playlists.py index d6a8d56df..de1e8d88e 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -20,6 +20,7 @@ from youtube_dl.extractor import ( SoundcloudUserIE, LivestreamIE, NHLVideocenterIE, + BambuserChannelIE, ) @@ -85,5 +86,13 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['title'], u'Highlights') self.assertEqual(len(result['entries']), 12) + def test_bambuser_channel(self): + dl = FakeYDL() + ie = BambuserChannelIE(dl) + result = ie.extract('http://bambuser.com/channel/pixelversity') + self.assertIsPlaylist(result) + self.assertEqual(result['title'], u'pixelversity') + self.assertTrue(len(result['entries']) >= 66) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a1e35eb46..a69c08f51 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -9,7 +9,7 @@ from .arte import ( ArteTVFutureIE, ) from .auengine import AUEngineIE -from .bambuser import BambuserIE +from .bambuser import BambuserIE, BambuserChannelIE from .bandcamp import BandcampIE from .bliptv import BlipTVIE, BlipTVUserIE from .bloomberg import BloombergIE diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index cf8da22e3..f3b36f473 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -1,10 +1,15 @@ import re import json +import itertools from .common import InfoExtractor +from ..utils import ( + compat_urllib_request, +) class BambuserIE(InfoExtractor): + IE_NAME = u'bambuser' _VALID_URL = r'https?://bambuser\.com/v/(?P\d+)' _API_KEY = '005f64509e19a868399060af746a00aa' @@ -33,10 +38,43 @@ class BambuserIE(InfoExtractor): 'id': video_id, 'title': info['title'], 'url': info['url'], - 'thumbnail': info['preview'], + 'thumbnail': info.get('preview'), 'duration': int(info['length']), 'view_count': int(info['views_total']), 'uploader': info['username'], 'uploader_id': info['uid'], } + +class BambuserChannelIE(InfoExtractor): + IE_NAME = u'bambuser:channel' + _VALID_URL = r'http://bambuser.com/channel/(?P.*?)(?:/|#|\?|$)' + # The maximum number we can get with each request + _STEP = 50 + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + user = mobj.group('user') + urls = [] + last_id = '' + for i in itertools.count(1): + req_url = ('http://bambuser.com/xhr-api/index.php?username={user}' + '&sort=created&access_mode=0%2C1%2C2&limit={count}' + '&method=broadcast&format=json&vid_older_than={last}' + ).format(user=user, count=self._STEP, last=last_id) + req = compat_urllib_request.Request(req_url) + # Without setting this header, we wouldn't get any result + req.add_header('Referer', 'http://bambuser.com/channel/%s' % user) + info_json = self._download_webpage(req, user, + u'Downloading page %d' % i) + results = json.loads(info_json)['result'] + if len(results) == 0: + break + last_id = results[-1]['vid'] + urls.extend(self.url_result(v['page'], 'Bambuser') for v in results) + + return { + '_type': 'playlist', + 'title': user, + 'entries': urls, + } From cf519235455f312ac45e1d9829018eb5ecbec628 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 2 Nov 2013 20:46:26 +0100 Subject: [PATCH 252/264] [youtube] Remove vevo test The video is no longer available and it seems that vevo video don't use encrypted signatures anymore. --- youtube_dl/extractor/youtube.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index dc601de52..a19abe1f0 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -339,18 +339,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ." } }, - { - u"url": u"http://www.youtube.com/watch?v=1ltcDfZMA3U", - u"file": u"1ltcDfZMA3U.mp4", - u"note": u"Test VEVO video (#897)", - u"info_dict": { - u"upload_date": u"20070518", - u"title": u"Maps - It Will Find You", - u"description": u"Music video by Maps performing It Will Find You.", - u"uploader": u"MuteUSA", - u"uploader_id": u"MuteUSA" - } - }, { u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY", u"file": u"UxxajLWwzqY.mp4", From 98d7efb537975b29ccaea64ff2765a0ec7bdb07d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 2 Nov 2013 20:51:09 +0100 Subject: [PATCH 253/264] [exfm] skip tests The site is down too often. --- youtube_dl/extractor/exfm.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/exfm.py b/youtube_dl/extractor/exfm.py index c74556579..a51d79b08 100644 --- a/youtube_dl/extractor/exfm.py +++ b/youtube_dl/extractor/exfm.py @@ -21,6 +21,7 @@ class ExfmIE(InfoExtractor): u'description': u'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive', }, u'note': u'Soundcloud song', + u'skip': u'The site is down too often', }, { u'url': u'http://ex.fm/song/wddt8', @@ -30,6 +31,7 @@ class ExfmIE(InfoExtractor): u'title': u'Safe and Sound', u'uploader': u'Capital Cities', }, + u'skip': u'The site is down too often', }, ] From f52f01b5d2ed117070475b0c7593a55d417e8e41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 2 Nov 2013 21:20:46 +0100 Subject: [PATCH 254/264] [brightcove] Don't set the extension If the video only has the 'FLVFullLengthURL' key, it can still be an mp4 file. --- youtube_dl/extractor/brightcove.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 1392f382a..0d9b87a34 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -23,7 +23,7 @@ class BrightcoveIE(InfoExtractor): # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/ u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001', u'file': u'2371591881001.mp4', - u'md5': u'9e80619e0a94663f0bdc849b4566af19', + u'md5': u'8eccab865181d29ec2958f32a6a754f5', u'note': u'Test Brightcove downloads and detection in GenericIE', u'info_dict': { u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”', @@ -122,12 +122,10 @@ class BrightcoveIE(InfoExtractor): best_format = renditions[-1] info.update({ 'url': best_format['defaultURL'], - 'ext': 'mp4', }) elif video_info.get('FLVFullLengthURL') is not None: info.update({ 'url': video_info['FLVFullLengthURL'], - 'ext': 'flv', }) else: raise ExtractorError(u'Unable to extract video url for %s' % info['id']) From 86ad94bb2ecdbc781f36f0e5fb49c91008e68cc8 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Sat, 2 Nov 2013 22:33:49 +0100 Subject: [PATCH 255/264] [ExtremeTubeIE] Set age_limit to 18 and fix uploader extraction --- youtube_dl/extractor/extremetube.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py index 981de430d..0f1eec40f 100644 --- a/youtube_dl/extractor/extremetube.py +++ b/youtube_dl/extractor/extremetube.py @@ -31,15 +31,13 @@ class ExtremeTubeIE(InfoExtractor): webpage = self._download_webpage(req, video_id) video_title = self._html_search_regex(r'

]*?title="([^"]+)"[^>]*>\1<', webpage, u'title') - uploader = self._html_search_regex(r'>Posted by:(?=<)(\s|<[^>]*>)*(.+?)\|', webpage, u'uploader', fatal=False) + uploader = self._html_search_regex(r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, u'uploader', fatal=False) video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, u'video_url')) path = compat_urllib_parse_urlparse( video_url ).path extension = os.path.splitext( path )[1][1:] format = path.split('/')[5].split('_')[:2] format = "-".join( format ) - age_limit = self._rta_search(webpage) - return { 'id': video_id, 'title': video_title, @@ -48,5 +46,5 @@ class ExtremeTubeIE(InfoExtractor): 'ext': extension, 'format': format, 'format_id': format, - 'age_limit': age_limit, + 'age_limit': 18, } From 137bbb3e37a41bd49f7c946ae18fb2cd0d1ba144 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Sat, 2 Nov 2013 22:45:48 +0100 Subject: [PATCH 256/264] [XTubeIE] Add description to TEST --- youtube_dl/extractor/xtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index 7d06a7021..483fb0791 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -16,6 +16,7 @@ class XTubeIE(InfoExtractor): u'md5': u'092fbdd3cbe292c920ef6fc6a8a9cdab', u'info_dict': { u"title": u"strange erotica", + u"description": u"surreal gay themed erotica...almost an ET kind of thing", u"uploader": u"greenshowers", u"age_limit": 18, } From a3dd924871a6fa01d84cadf0a6f60ef622189f09 Mon Sep 17 00:00:00 2001 From: Craig Markwardt Date: Sat, 2 Nov 2013 22:40:48 -0400 Subject: [PATCH 257/264] Add YoutubeSearchDateIE extractor to youtube.py & __init__.py, which searches by publication date. --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/youtube.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index bcf1cce7f..abdee8eb0 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -159,6 +159,7 @@ from .youtube import ( YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, + YoutubeSearchDateIE, YoutubeUserIE, YoutubeChannelIE, YoutubeShowIE, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index dc601de52..6b5ce068d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1743,6 +1743,9 @@ class YoutubeSearchIE(SearchInfoExtractor): videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids] return self.playlist_result(videos, query) +class YoutubeSearchDateIE(YoutubeSearchIE): + _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published' + _SEARCH_KEY = 'ytsearchdate' class YoutubeShowIE(InfoExtractor): IE_DESC = u'YouTube.com (multi-season) shows' From b6c45014aed4b3176be1142958be98d7cb9dbaff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 3 Nov 2013 11:56:45 +0100 Subject: [PATCH 258/264] Set the extra_info inside YoutubeDL.process_ie_result and set only if the keys are missing --- test/test_YoutubeDL.py | 12 ++++++++++++ youtube_dl/YoutubeDL.py | 26 +++++++++++++------------- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index ffebb4ae5..58cf9c313 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -128,6 +128,18 @@ class TestFormatSelection(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], u'35') + def test_add_extra_info(self): + test_dict = { + 'extractor': 'Foo', + } + extra_info = { + 'extractor': 'Bar', + 'playlist': 'funny videos', + } + YDL.add_extra_info(test_dict, extra_info) + self.assertEqual(test_dict['extractor'], 'Foo') + self.assertEqual(test_dict['playlist'], 'funny videos') + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 7f73ea360..a3e0a700f 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -318,6 +318,12 @@ class YoutubeDL(object): % info_dict) return None + @staticmethod + def add_extra_info(info_dict, extra_info): + '''Set the keys from extra_info in info dict if they are missing''' + for key, value in extra_info.items(): + info_dict.setdefault(key, value) + def extract_info(self, url, download=True, ie_key=None, extra_info={}): ''' Returns a list with a dictionary for each video we find. @@ -344,17 +350,13 @@ class YoutubeDL(object): break if isinstance(ie_result, list): # Backwards compatibility: old IE result format - for result in ie_result: - result.update(extra_info) ie_result = { '_type': 'compat_list', 'entries': ie_result, } - else: - ie_result.update(extra_info) if 'extractor' not in ie_result: ie_result['extractor'] = ie.IE_NAME - return self.process_ie_result(ie_result, download=download) + return self.process_ie_result(ie_result, download, extra_info) except ExtractorError as de: # An error we somewhat expected self.report_error(compat_str(de), de.format_traceback()) break @@ -378,7 +380,7 @@ class YoutubeDL(object): result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system if result_type == 'video': - ie_result.update(extra_info) + self.add_extra_info(ie_result, extra_info) return self.process_video_result(ie_result) elif result_type == 'url': # We have to add extra_info to the results because it may be @@ -388,6 +390,7 @@ class YoutubeDL(object): ie_key=ie_result.get('ie_key'), extra_info=extra_info) elif result_type == 'playlist': + self.add_extra_info(ie_result, extra_info) # We process each entry in the playlist playlist = ie_result.get('title', None) or ie_result.get('id', None) self.to_screen(u'[download] Downloading playlist: %s' % playlist) @@ -413,12 +416,8 @@ class YoutubeDL(object): extra = { 'playlist': playlist, 'playlist_index': i + playliststart, + 'extractor': ie_result['extractor'], } - if not 'extractor' in entry: - # We set the extractor, if it's an url it will be set then to - # the new extractor, but if it's already a video we must make - # sure it's present: see issue #877 - entry['extractor'] = ie_result['extractor'] entry_result = self.process_ie_result(entry, download=download, extra_info=extra) @@ -427,10 +426,11 @@ class YoutubeDL(object): return ie_result elif result_type == 'compat_list': def _fixup(r): - r.setdefault('extractor', ie_result['extractor']) + self.add_extra_info(r, + {'extractor': ie_result['extractor']}) return r ie_result['entries'] = [ - self.process_ie_result(_fixup(r), download=download) + self.process_ie_result(_fixup(r), download, extra_info) for r in ie_result['entries'] ] return ie_result From 9103bbc5cd11957de2e906e4401dcf4df9511d28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 3 Nov 2013 12:11:13 +0100 Subject: [PATCH 259/264] Add the 'webpage_url' field to info_dict The url for the video page, it must allow to reproduce the result. It's automatically set by YoutubeDL if it's missing. --- test/test_download.py | 3 +++ youtube_dl/YoutubeDL.py | 13 ++++++++++--- youtube_dl/extractor/common.py | 3 +++ youtube_dl/extractor/vimeo.py | 13 ++++++------- youtube_dl/extractor/youtube.py | 3 ++- 5 files changed, 24 insertions(+), 11 deletions(-) diff --git a/test/test_download.py b/test/test_download.py index dfb04d010..d6cc9ec33 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -148,6 +148,9 @@ def generator(test_case): # Check for the presence of mandatory fields for key in ('id', 'url', 'title', 'ext'): self.assertTrue(key in info_dict.keys() and info_dict[key]) + # Check for mandatory fields that are automatically set by YoutubeDL + for key in ['webpage_url', 'extractor']: + self.assertTrue(info_dict.get(key), u'Missing field: %s' % key) finally: try_rm_tcs_files() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index a3e0a700f..8938a2cd3 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -354,8 +354,11 @@ class YoutubeDL(object): '_type': 'compat_list', 'entries': ie_result, } - if 'extractor' not in ie_result: - ie_result['extractor'] = ie.IE_NAME + self.add_extra_info(ie_result, + { + 'extractor': ie.IE_NAME, + 'webpage_url': url + }) return self.process_ie_result(ie_result, download, extra_info) except ExtractorError as de: # An error we somewhat expected self.report_error(compat_str(de), de.format_traceback()) @@ -417,6 +420,7 @@ class YoutubeDL(object): 'playlist': playlist, 'playlist_index': i + playliststart, 'extractor': ie_result['extractor'], + 'webpage_url': ie_result['webpage_url'], } entry_result = self.process_ie_result(entry, download=download, @@ -427,7 +431,10 @@ class YoutubeDL(object): elif result_type == 'compat_list': def _fixup(r): self.add_extra_info(r, - {'extractor': ie_result['extractor']}) + { + 'extractor': ie_result['extractor'], + 'webpage_url': ie_result['webpage_url'], + }) return r ie_result['entries'] = [ self.process_ie_result(_fixup(r), download, extra_info) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index cef4dce85..e0ccba533 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -71,6 +71,9 @@ class InfoExtractor(object): ("3D" or "DASH video") * width Width of the video, if known * height Height of the video, if known + webpage_url: The url to the video webpage, if given to youtube-dl it + should allow to get the same result again. (It will be set + by YoutubeDL if it's missing) Unless mentioned otherwise, the fields should be Unicode strings. diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index c7d864a2b..62273fd33 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -20,7 +20,7 @@ class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?Phttps?://)?(?:(?:www|player)\.)?vimeo(?Ppro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)/?(?:[?].*)?(?:#.*)?$' + _VALID_URL = r'(?Phttps?://)?(?:(?:www|(?Pplayer))\.)?vimeo(?Ppro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)/?(?:[?].*)?(?:#.*)?$' _NETRC_MACHINE = 'vimeo' IE_NAME = u'vimeo' _TESTS = [ @@ -128,11 +128,9 @@ class VimeoIE(InfoExtractor): raise ExtractorError(u'Invalid URL: %s' % url) video_id = mobj.group('id') - if not mobj.group('proto'): - url = 'https://' + url - elif mobj.group('pro'): + if mobj.group('pro') or mobj.group('player'): url = 'http://player.vimeo.com/video/' + video_id - elif mobj.group('direct_link'): + else: url = 'https://vimeo.com/' + video_id # Retrieve video webpage to extract further information @@ -234,7 +232,7 @@ class VimeoIE(InfoExtractor): if len(formats) == 0: raise ExtractorError(u'No known codec found') - return [{ + return { 'id': video_id, 'uploader': video_uploader, 'uploader_id': video_uploader_id, @@ -243,7 +241,8 @@ class VimeoIE(InfoExtractor): 'thumbnail': video_thumbnail, 'description': video_description, 'formats': formats, - }] + 'webpage_url': url, + } class VimeoChannelIE(InfoExtractor): diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index a19abe1f0..6ddd6ef06 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1485,7 +1485,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'subtitles': video_subtitles, 'duration': video_duration, 'age_limit': 18 if age_gate else 0, - 'annotations': video_annotations + 'annotations': video_annotations, + 'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id, }) return results From be97abc247d26bc36d1ef8cad5c17fc2a99d9101 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 3 Nov 2013 12:14:44 +0100 Subject: [PATCH 260/264] Set the 'extractor_key' field in the info_dict It's the string returned by the class method 'ie_key', which allows to retrieve the extractor with 'get_info_extractor' --- test/test_download.py | 2 +- youtube_dl/YoutubeDL.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/test/test_download.py b/test/test_download.py index d6cc9ec33..73379beb1 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -149,7 +149,7 @@ def generator(test_case): for key in ('id', 'url', 'title', 'ext'): self.assertTrue(key in info_dict.keys() and info_dict[key]) # Check for mandatory fields that are automatically set by YoutubeDL - for key in ['webpage_url', 'extractor']: + for key in ['webpage_url', 'extractor', 'extractor_key']: self.assertTrue(info_dict.get(key), u'Missing field: %s' % key) finally: try_rm_tcs_files() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 8938a2cd3..86a6fd043 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -357,7 +357,8 @@ class YoutubeDL(object): self.add_extra_info(ie_result, { 'extractor': ie.IE_NAME, - 'webpage_url': url + 'webpage_url': url, + 'extractor_key': ie.ie_key(), }) return self.process_ie_result(ie_result, download, extra_info) except ExtractorError as de: # An error we somewhat expected @@ -421,6 +422,7 @@ class YoutubeDL(object): 'playlist_index': i + playliststart, 'extractor': ie_result['extractor'], 'webpage_url': ie_result['webpage_url'], + 'extractor_key': ie_result['extractor_key'], } entry_result = self.process_ie_result(entry, download=download, @@ -434,6 +436,7 @@ class YoutubeDL(object): { 'extractor': ie_result['extractor'], 'webpage_url': ie_result['webpage_url'], + 'extractor_key': ie_result['extractor_key'], }) return r ie_result['entries'] = [ From a56f9de156c7cca29dfa45de1dadc66e10a265f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 3 Nov 2013 14:03:17 +0100 Subject: [PATCH 261/264] Style fixes for extractors: remove spaces around (,),{ and } --- youtube_dl/extractor/depositfiles.py | 2 +- youtube_dl/extractor/extremetube.py | 6 +++--- youtube_dl/extractor/hypem.py | 4 ++-- youtube_dl/extractor/keezmovies.py | 6 +++--- youtube_dl/extractor/mofosex.py | 6 +++--- youtube_dl/extractor/pornhub.py | 6 +++--- youtube_dl/extractor/spankwire.py | 6 +++--- youtube_dl/extractor/tube8.py | 6 +++--- youtube_dl/extractor/vimeo.py | 2 +- youtube_dl/extractor/xtube.py | 6 +++--- youtube_dl/extractor/yahoo.py | 2 +- youtube_dl/extractor/youku.py | 6 +++--- youtube_dl/extractor/youporn.py | 8 ++++---- 13 files changed, 33 insertions(+), 33 deletions(-) diff --git a/youtube_dl/extractor/depositfiles.py b/youtube_dl/extractor/depositfiles.py index d43348955..2c9fb5f2e 100644 --- a/youtube_dl/extractor/depositfiles.py +++ b/youtube_dl/extractor/depositfiles.py @@ -25,7 +25,7 @@ class DepositFilesIE(InfoExtractor): url = 'http://depositfiles.com/en/files/' + file_id # Retrieve file webpage with 'Free download' button pressed - free_download_indication = { 'gateway_result' : '1' } + free_download_indication = {'gateway_result' : '1'} request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication)) try: self.report_download_webpage(file_id) diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py index 0f1eec40f..1c20e4364 100644 --- a/youtube_dl/extractor/extremetube.py +++ b/youtube_dl/extractor/extremetube.py @@ -33,10 +33,10 @@ class ExtremeTubeIE(InfoExtractor): video_title = self._html_search_regex(r'

]*?title="([^"]+)"[^>]*>\1<', webpage, u'title') uploader = self._html_search_regex(r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, u'uploader', fatal=False) video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, u'video_url')) - path = compat_urllib_parse_urlparse( video_url ).path - extension = os.path.splitext( path )[1][1:] + path = compat_urllib_parse_urlparse(video_url).path + extension = os.path.splitext(path)[1][1:] format = path.split('/')[5].split('_')[:2] - format = "-".join( format ) + format = "-".join(format) return { 'id': video_id, diff --git a/youtube_dl/extractor/hypem.py b/youtube_dl/extractor/hypem.py index ab2b59103..9bd06e7c7 100644 --- a/youtube_dl/extractor/hypem.py +++ b/youtube_dl/extractor/hypem.py @@ -30,7 +30,7 @@ class HypemIE(InfoExtractor): raise ExtractorError(u'Invalid URL: %s' % url) track_id = mobj.group(1) - data = { 'ax': 1, 'ts': time.time() } + data = {'ax': 1, 'ts': time.time()} data_encoded = compat_urllib_parse.urlencode(data) complete_url = url + "?" + data_encoded request = compat_urllib_request.Request(complete_url) @@ -68,4 +68,4 @@ class HypemIE(InfoExtractor): 'ext': "mp3", 'title': title, 'artist': artist, - }] \ No newline at end of file + }] diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index 786924445..29658a7d6 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -43,10 +43,10 @@ class KeezMoviesIE(InfoExtractor): if webpage.find('encrypted=true')!=-1: password = self._html_search_regex(r'video_title=(.+?)&', webpage, u'password') video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8') - path = compat_urllib_parse_urlparse( video_url ).path - extension = os.path.splitext( path )[1][1:] + path = compat_urllib_parse_urlparse(video_url).path + extension = os.path.splitext(path)[1][1:] format = path.split('/')[4].split('_')[:2] - format = "-".join( format ) + format = "-".join(format) age_limit = self._rta_search(webpage) diff --git a/youtube_dl/extractor/mofosex.py b/youtube_dl/extractor/mofosex.py index a0c926cd1..b9430b09b 100644 --- a/youtube_dl/extractor/mofosex.py +++ b/youtube_dl/extractor/mofosex.py @@ -31,10 +31,10 @@ class MofosexIE(InfoExtractor): video_title = self._html_search_regex(r'

(.+?)<', webpage, u'title') video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, u'video_url')) - path = compat_urllib_parse_urlparse( video_url ).path - extension = os.path.splitext( path )[1][1:] + path = compat_urllib_parse_urlparse(video_url).path + extension = os.path.splitext(path)[1][1:] format = path.split('/')[5].split('_')[:2] - format = "-".join( format ) + format = "-".join(format) age_limit = self._rta_search(webpage) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 5e2454f1b..75cf4bb9f 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -47,10 +47,10 @@ class PornHubIE(InfoExtractor): formats = [] for video_url in video_urls: - path = compat_urllib_parse_urlparse( video_url ).path - extension = os.path.splitext( path )[1][1:] + path = compat_urllib_parse_urlparse(video_url).path + extension = os.path.splitext(path)[1][1:] format = path.split('/')[5].split('_')[:2] - format = "-".join( format ) + format = "-".join(format) formats.append({ 'url': video_url, 'ext': extension, diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index 32df0a7fb..97f9c268a 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -49,10 +49,10 @@ class SpankwireIE(InfoExtractor): formats = [] for video_url in video_urls: - path = compat_urllib_parse_urlparse( video_url ).path - extension = os.path.splitext( path )[1][1:] + path = compat_urllib_parse_urlparse(video_url).path + extension = os.path.splitext(path)[1][1:] format = path.split('/')[4].split('_')[:2] - format = "-".join( format ) + format = "-".join(format) formats.append({ 'url': video_url, 'ext': extension, diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index aea9d9a24..d4b7603c7 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -46,10 +46,10 @@ class Tube8IE(InfoExtractor): if webpage.find('"encrypted":true')!=-1: password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password') video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8') - path = compat_urllib_parse_urlparse( video_url ).path - extension = os.path.splitext( path )[1][1:] + path = compat_urllib_parse_urlparse(video_url).path + extension = os.path.splitext(path)[1][1:] format = path.split('/')[4].split('_')[:2] - format = "-".join( format ) + format = "-".join(format) return { 'id': video_id, diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 62273fd33..d465bf20b 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -203,7 +203,7 @@ class VimeoIE(InfoExtractor): # Vimeo specific: extract video codec and quality information # First consider quality, then codecs, then take everything codecs = [('vp6', 'flv'), ('vp8', 'flv'), ('h264', 'mp4')] - files = { 'hd': [], 'sd': [], 'other': []} + files = {'hd': [], 'sd': [], 'other': []} config_files = config["video"].get("files") or config["request"].get("files") for codec_name, codec_extension in codecs: for quality in config_files.get(codec_name, []): diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index 483fb0791..03ad88bed 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -35,12 +35,12 @@ class XTubeIE(InfoExtractor): video_uploader = self._html_search_regex(r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, u'uploader', fatal=False) video_description = self._html_search_regex(r'

([^<]+)', webpage, u'description', default=None) video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, u'video_url').replace('\\/', '/') - path = compat_urllib_parse_urlparse( video_url ).path - extension = os.path.splitext( path )[1][1:] + path = compat_urllib_parse_urlparse(video_url).path + extension = os.path.splitext(path)[1][1:] format = path.split('/')[5].split('_')[:2] format[0] += 'p' format[1] += 'k' - format = "-".join( format ) + format = "-".join(format) return { 'id': video_id, diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 464b498f5..34e6afb20 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -132,7 +132,7 @@ class YahooSearchIE(SearchInfoExtractor): mobj = re.search(r'(?Pscreen\.yahoo\.com/.*?-\d*?\.html)"', r) e = self.url_result('http://' + mobj.group('url'), 'Yahoo') res['entries'].append(e) - if (pagenum * 30 +i >= n) or (m[u'last'] >= (m[u'total'] -1 )): + if (pagenum * 30 +i >= n) or (m[u'last'] >= (m[u'total'] -1)): break return res diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 9d88c17f5..a8fd40c83 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -18,7 +18,7 @@ class YoukuIE(InfoExtractor): u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html", u"file": u"XNDgyMDQ2NTQw_part00.flv", u"md5": u"ffe3f2e435663dc2d1eea34faeff5b5b", - u"params": { u"test": False }, + u"params": {u"test": False}, u"info_dict": { u"title": u"youtube-dl test video \"'/\\ä↭𝕐" } @@ -37,8 +37,8 @@ class YoukuIE(InfoExtractor): source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890") seed = float(seed) for i in range(len(source)): - seed = (seed * 211 + 30031 ) % 65536 - index = math.floor(seed / 65536 * len(source) ) + seed = (seed * 211 + 30031) % 65536 + index = math.floor(seed / 65536 * len(source)) mixed.append(source[int(index)]) source.remove(source[int(index)]) #return ''.join(mixed) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index e46a9b4d6..bd0f2cae0 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -81,14 +81,14 @@ class YouPornIE(InfoExtractor): # http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0 # A path looks like this: # /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4 - video_url = unescapeHTML( link ) - path = compat_urllib_parse_urlparse( video_url ).path - extension = os.path.splitext( path )[1][1:] + video_url = unescapeHTML(link) + path = compat_urllib_parse_urlparse(video_url).path + extension = os.path.splitext(path)[1][1:] format = path.split('/')[4].split('_')[:2] # size = format[0] # bitrate = format[1] - format = "-".join( format ) + format = "-".join(format) # title = u'%s-%s-%s' % (video_title, size, bitrate) formats.append({ From 12ebdd150624adc20a841f5fb174676b123ef826 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 3 Nov 2013 15:49:59 +0100 Subject: [PATCH 262/264] [viddler] Support non-digit IDs (Fixes #1714) --- youtube_dl/extractor/viddler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py index 12c84a985..826804af3 100644 --- a/youtube_dl/extractor/viddler.py +++ b/youtube_dl/extractor/viddler.py @@ -8,7 +8,7 @@ from ..utils import ( class ViddlerIE(InfoExtractor): - _VALID_URL = r'(?Phttps?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P[0-9]+)' + _VALID_URL = r'(?Phttps?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P[a-z0-9]+)' _TEST = { u"url": u"http://www.viddler.com/v/43903784", u'file': u'43903784.mp4', From 165e179764e7d276d5e6ed79a8e63b63852cdd3e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 3 Nov 2013 15:50:36 +0100 Subject: [PATCH 263/264] release 2013.11.03 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 75a46a2d5..cc0f9cb4e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.11.02' +__version__ = '2013.11.03' From 08fb86c49b92b53df8963065ab23fd558b8a90d8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 3 Nov 2013 15:58:52 +0100 Subject: [PATCH 264/264] [youtube] Add description for YoutubeSearchDateIE (#1710) --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 14e8f59e6..74a381fe2 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1735,6 +1735,7 @@ class YoutubeSearchIE(SearchInfoExtractor): class YoutubeSearchDateIE(YoutubeSearchIE): _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published' _SEARCH_KEY = 'ytsearchdate' + IE_DESC = u'YouTube.com searches, newest videos first' class YoutubeShowIE(InfoExtractor): IE_DESC = u'YouTube.com (multi-season) shows'