From 131842bb0b96551c3c9e0cf479f8537d32e5ad0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Wed, 26 Jun 2013 00:51:27 -0300 Subject: [PATCH 01/12] setup: Move pseudo-docstring to a proper comment. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A string statement is not a docstring if it doesn't occur right at the top of modules, functions, class definitions etc. This patch fixes it. Signed-off-by: Rogério Brito --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 61435fcb7..9c40154ab 100644 --- a/setup.py +++ b/setup.py @@ -12,8 +12,9 @@ except ImportError: from distutils.core import setup try: + # This will create an exe that needs Microsoft Visual C++ 2008 + # Redistributable Package import py2exe - """This will create an exe that needs Microsoft Visual C++ 2008 Redistributable Package""" except ImportError: if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': print("Cannot import py2exe", file=sys.stderr) From d055fe4cb09dc18a368f40f73e77081e7b0127bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Wed, 26 Jun 2013 00:53:55 -0300 Subject: [PATCH 02/12] setup: cosmetics: Add/remove some whitespace for readability. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This also fixes some long lines. Signed-off-by: Rogério Brito --- setup.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/setup.py b/setup.py index 9c40154ab..42cbaf858 100644 --- a/setup.py +++ b/setup.py @@ -27,13 +27,15 @@ py2exe_options = { "dist_dir": '.', "dll_excludes": ['w9xpopen.exe'], } + py2exe_console = [{ "script": "./youtube_dl/__main__.py", "dest_base": "youtube-dl", }] + py2exe_params = { 'console': py2exe_console, - 'options': { "py2exe": py2exe_options }, + 'options': {"py2exe": py2exe_options}, 'zipfile': None } @@ -42,13 +44,16 @@ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': else: params = { 'scripts': ['bin/youtube-dl'], - 'data_files': [('etc/bash_completion.d', ['youtube-dl.bash-completion']), # Installing system-wide would require sudo... - ('share/doc/youtube_dl', ['README.txt']), - ('share/man/man1/', ['youtube-dl.1'])] + 'data_files': [ # Installing system-wide would require sudo... + ('etc/bash_completion.d', ['youtube-dl.bash-completion']), + ('share/doc/youtube_dl', ['README.txt']), + ('share/man/man1/', ['youtube-dl.1']) + ] } # Get the version from youtube_dl/version.py without importing the package -exec(compile(open('youtube_dl/version.py').read(), 'youtube_dl/version.py', 'exec')) +exec(compile(open('youtube_dl/version.py').read(), + 'youtube_dl/version.py', 'exec')) setup( name = 'youtube_dl', From 652e7768934025add22f1beae6a525fecd2a6f0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Wed, 26 Jun 2013 00:54:55 -0300 Subject: [PATCH 03/12] setup: PEP-8 fixes. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rogério Brito --- setup.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/setup.py b/setup.py index 42cbaf858..3b6dc2d40 100644 --- a/setup.py +++ b/setup.py @@ -56,21 +56,22 @@ exec(compile(open('youtube_dl/version.py').read(), 'youtube_dl/version.py', 'exec')) setup( - name = 'youtube_dl', - version = __version__, - description = 'YouTube video downloader', - long_description = 'Small command-line program to download videos from YouTube.com and other video sites.', - url = 'https://github.com/rg3/youtube-dl', - author = 'Ricardo Garcia', - maintainer = 'Philipp Hagemeister', - maintainer_email = 'phihag@phihag.de', - packages = ['youtube_dl', 'youtube_dl.extractor'], + name='youtube_dl', + version=__version__, + description='YouTube video downloader', + long_description='Small command-line program to download videos from' + ' YouTube.com and other video sites.', + url='https://github.com/rg3/youtube-dl', + author='Ricardo Garcia', + maintainer='Philipp Hagemeister', + maintainer_email='phihag@phihag.de', + packages=['youtube_dl', 'youtube_dl.extractor'], # Provokes warning on most systems (why?!) - #test_suite = 'nose.collector', - #test_requires = ['nosetest'], + # test_suite = 'nose.collector', + # test_requires = ['nosetest'], - classifiers = [ + classifiers=[ "Topic :: Multimedia :: Video", "Development Status :: 5 - Production/Stable", "Environment :: Console", From 2e1b3afeca99278baa0ffa7237ca8628f551e268 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 2 Jul 2013 07:39:54 +0200 Subject: [PATCH 04/12] README.md: Fix markup and some of the text. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (Originally from Rogério Brito ) --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ceb85fef1..ee2cbb002 100644 --- a/README.md +++ b/README.md @@ -194,11 +194,11 @@ Examples: ### Can you please put the -b option back? -Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the -b option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you''re interested in. In that case, simply request it with the -f option and youtube-dl will try to download it. +Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the `-b` option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you're interested in. In that case, simply request it with the `-f` option and youtube-dl will try to download it. ### I get HTTP error 402 when trying to download a video. What's this? -Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We''re [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl. +Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl. ### I have downloaded a video but how can I play it? From deacef651f7ca9e206fdd2b962106b943d8cd381 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 2 Jul 2013 08:35:39 +0200 Subject: [PATCH 05/12] Improve formatting --- youtube_dl/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 7abe52255..d4a005f62 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -582,8 +582,10 @@ def _real_main(argv=None): if opts.verbose: ydl.to_screen(u'[debug] youtube-dl version ' + __version__) try: - sp = subprocess.Popen(['git', 'rev-parse', '--short', 'HEAD'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, - cwd=os.path.dirname(os.path.abspath(__file__))) + sp = subprocess.Popen( + ['git', 'rev-parse', '--short', 'HEAD'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + cwd=os.path.dirname(os.path.abspath(__file__))) out, err = sp.communicate() out = out.decode().strip() if re.match('[0-9a-f]+', out): From 8dba13f7e82ecb57b8e92e43ce63d51937a50288 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 2 Jul 2013 08:36:20 +0200 Subject: [PATCH 06/12] Squelch git not found exception (#973) --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index d4a005f62..db63d0adb 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -591,7 +591,7 @@ def _real_main(argv=None): if re.match('[0-9a-f]+', out): ydl.to_screen(u'[debug] Git HEAD: ' + out) except: - pass + sys.exc_clear() ydl.to_screen(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform())) ydl.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies)) From 9a82b2389fd9b1d893400892d92006d2f9eb17db Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 2 Jul 2013 08:40:21 +0200 Subject: [PATCH 07/12] Do not show bug report for errors that are to be expected (Closes #973) --- youtube_dl/extractor/youtube.py | 2 +- youtube_dl/utils.py | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b526e0c53..14a8bd6ea 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -441,7 +441,7 @@ class YoutubeIE(InfoExtractor): break if 'token' not in video_info: if 'reason' in video_info: - raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0]) + raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True) else: raise ExtractorError(u'"token" parameter not in video info for unknown reason') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f9e7ce956..9137a4f70 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -470,10 +470,14 @@ def make_HTTPS_handler(opts): class ExtractorError(Exception): """Error during info extraction.""" - def __init__(self, msg, tb=None): - """ tb, if given, is the original traceback (so that it can be printed out). """ + def __init__(self, msg, tb=None, expected=False): + """ tb, if given, is the original traceback (so that it can be printed out). + If expected is set, this is a normal error message and most likely not a bug in youtube-dl. + """ - if not sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): + if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): + expected = True + if not expected: msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output.' super(ExtractorError, self).__init__(msg) From d5a62e4f5fb83bbcd44690e19df80997d27e31f3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 2 Jul 2013 09:14:09 +0200 Subject: [PATCH 08/12] release 2013.07.02 --- README.md | 17 ++++++++++------- youtube_dl/version.py | 2 +- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index ee2cbb002..b246d3c53 100644 --- a/README.md +++ b/README.md @@ -18,19 +18,13 @@ which means you can modify it, redistribute it or use it however you like. --version print program version and exit -U, --update update this program to latest version -i, --ignore-errors continue on download errors - -r, --rate-limit LIMIT maximum download rate (e.g. 50k or 44.6m) - -R, --retries RETRIES number of retries (default is 10) - --buffer-size SIZE size of download buffer (e.g. 1024 or 16k) - (default is 1024) - --no-resize-buffer do not automatically adjust the buffer size. By - default, the buffer size is automatically resized - from an initial value of SIZE. --dump-user-agent display the current browser identification --user-agent UA specify a custom user agent --referer REF specify a custom referer, use if the video access is restricted to one domain --list-extractors List all supported extractors and the URLs they would handle + --extractor-descriptions Output descriptions of all supported extractors --proxy URL Use the specified HTTP/HTTPS proxy --no-check-certificate Suppress HTTPS certificate validation. @@ -50,6 +44,15 @@ which means you can modify it, redistribute it or use it however you like. --datebefore DATE download only videos uploaded before this date --dateafter DATE download only videos uploaded after this date +## Download Options: + -r, --rate-limit LIMIT maximum download rate (e.g. 50k or 44.6m) + -R, --retries RETRIES number of retries (default is 10) + --buffer-size SIZE size of download buffer (e.g. 1024 or 16k) + (default is 1024) + --no-resize-buffer do not automatically adjust the buffer size. By + default, the buffer size is automatically resized + from an initial value of SIZE. + ## Filesystem Options: -t, --title use title in file name (default) --id use only video ID in file name diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7bba3a883..bc4ad90be 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.06.34.4' +__version__ = '2013.07.02' From d4da3d6116158fd21797e5a43383970e5e9269ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 2 Jul 2013 10:40:23 +0200 Subject: [PATCH 09/12] BlipTVIE: download the video in the best quality (closes #215) --- youtube_dl/extractor/bliptv.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index 37141e6a0..f7af65606 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -27,7 +27,7 @@ class BlipTVIE(InfoExtractor): _TEST = { u'url': u'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352', u'file': u'5779306.m4v', - u'md5': u'b2d849efcf7ee18917e4b4d9ff37cafe', + u'md5': u'80baf1ec5c3d2019037c1c707d676b9f', u'info_dict': { u"upload_date": u"20111205", u"description": u"md5:9bc31f227219cde65e47eeec8d2dc596", @@ -103,7 +103,12 @@ class BlipTVIE(InfoExtractor): data = json_data upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') - video_url = data['media']['url'] + if 'additionalMedia' in data: + formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height'])) + best_format = formats[-1] + video_url = best_format['url'] + else: + video_url = data['media']['url'] umobj = re.match(self._URL_EXT, video_url) if umobj is None: raise ValueError('Can not determine filename extension') From 24a267b5626e23db407681273733c7bb193b9a36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 2 Jul 2013 12:38:24 +0200 Subject: [PATCH 10/12] TudouIE: extract all the segments of the video and download the best quality (closes #975) Also simplify a bit the extraction of the id from the url and write directly the title for the test video --- youtube_dl/extractor/tudou.py | 57 ++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index 4681a6f79..1405b73f7 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -1,24 +1,34 @@ +# coding: utf-8 + import re +import json from .common import InfoExtractor class TudouIE(InfoExtractor): - _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+)\.html)' + _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?' _TEST = { u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', - u'file': u'159447792.f4v', - u'md5': u'ad7c358a01541e926a1e413612c6b10a', + u'file': u'159448201.f4v', + u'md5': u'140a49ed444bd22f93330985d8475fcb', u'info_dict': { - u"title": u"\u5361\u9a6c\u4e54\u56fd\u8db3\u5f00\u5927\u811a\u957f\u4f20\u51b2\u540a\u96c6\u9526" + u"title": u"卡马乔国足开大脚长传冲吊集锦" } } + def _url_for_id(self, id, quality = None): + info_url = "http://v2.tudou.com/f?id="+str(id) + if quality: + info_url += '&hd' + quality + webpage = self._download_webpage(info_url, id, "Opening the info webpage") + final_url = self._html_search_regex('>(.+?)',webpage, 'video url') + return final_url + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(2).replace('.html','') + video_id = mobj.group(2) webpage = self._download_webpage(url, video_id) - video_id = re.search('"k":(.+?),',webpage).group(1) title = re.search(",kw:\"(.+)\"",webpage) if title is None: title = re.search(",kw: \'(.+)\'",webpage) @@ -27,14 +37,27 @@ class TudouIE(InfoExtractor): if thumbnail_url is None: thumbnail_url = re.search(",pic:\"(.+?)\"",webpage) thumbnail_url = thumbnail_url.group(1) - info_url = "http://v2.tudou.com/f?id="+str(video_id) - webpage = self._download_webpage(info_url, video_id, "Opening the info webpage") - final_url = re.search('\>(.+?)\<\/f\>',webpage).group(1) - ext = (final_url.split('?')[0]).split('.')[-1] - return [{ - 'id': video_id, - 'url': final_url, - 'ext': ext, - 'title': title, - 'thumbnail': thumbnail_url, - }] + + segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments') + segments = json.loads(segs_json) + # It looks like the keys are the arguments that have to be passed as + # the hd field in the request url, we pick the higher + quality = sorted(segments.keys())[-1] + parts = segments[quality] + result = [] + len_parts = len(parts) + if len_parts > 1: + self.to_screen(u'%s: found %s parts' % (video_id, len_parts)) + for part in parts: + part_id = part['k'] + final_url = self._url_for_id(part_id, quality) + ext = (final_url.split('?')[0]).split('.')[-1] + part_info = {'id': part_id, + 'url': final_url, + 'ext': ext, + 'title': title, + 'thumbnail': thumbnail_url, + } + result.append(part_info) + + return result From 9826925a207628deef3b5c8511ec29dffdfefda5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 2 Jul 2013 17:34:40 +0200 Subject: [PATCH 11/12] ArteTVIE: extract the video with the correct language Some urls from the French version of the page could download the German version. Also instead of extracting the json url from the webpage, build it to skip the download --- youtube_dl/extractor/arte.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 183274eb7..a030a28bb 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -16,7 +16,7 @@ class ArteTvIE(InfoExtractor): www.arte.tv/guide, the extraction process is different for each one. The videos expire in 7 days, so we can't add tests. """ - _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?:fr|de)/(?:(?:sendungen|emissions)/)?(?P.*?)/(?P.*?)(\?.*)?' + _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?Pfr|de)/(?:(?:sendungen|emissions)/)?(?P.*?)/(?P.*?)(\?.*)?' _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?:fr|de)/.*-(?P.*?).html' _LIVE_URL = r'index-[0-9]+\.html$' @@ -57,10 +57,11 @@ class ArteTvIE(InfoExtractor): mobj = re.match(self._EMISSION_URL, url) if mobj is not None: name = mobj.group('name') + lang = mobj.group('lang') # This is not a real id, it can be for example AJT for the news # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal video_id = mobj.group('id') - return self._extract_emission(url, video_id) + return self._extract_emission(url, video_id, lang) mobj = re.match(self._VIDEOS_URL, url) if mobj is not None: @@ -72,10 +73,9 @@ class ArteTvIE(InfoExtractor): # self.extractLiveStream(url) # return - def _extract_emission(self, url, video_id): + def _extract_emission(self, url, video_id, lang): """Extract from www.arte.tv/guide""" - webpage = self._download_webpage(url, video_id) - json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url') + json_url = 'http://org-www.arte.tv/papi/tvguide/videos/stream/player/F/%s_PLUS7-F/ALL/ALL.json' % video_id json_info = self._download_webpage(json_url, video_id, 'Downloading info json') self.report_extraction(video_id) @@ -91,6 +91,16 @@ class ArteTvIE(InfoExtractor): } formats = player_info['VSR'].values() + def _match_lang(f): + # Return true if that format is in the language of the url + if lang == 'fr': + l = 'F' + elif lang == 'de': + l = 'A' + regexes = [r'VO?%s' % l, r'V%s-ST.' % l] + return any(re.match(r, f['versionCode']) for r in regexes) + # Some formats may not be in the same language as the url + formats = filter(_match_lang, formats) # We order the formats by quality formats = sorted(formats, key=lambda f: int(f['height'])) # Pick the best quality From 5d2eac9ebafb93065899137cd07d1704a376c154 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 3 Jul 2013 16:36:36 +0200 Subject: [PATCH 12/12] [auengine] Add tests (Fixes #985) --- youtube_dl/extractor/auengine.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py index 3b4ade3bf..0febbff4f 100644 --- a/youtube_dl/extractor/auengine.py +++ b/youtube_dl/extractor/auengine.py @@ -8,6 +8,14 @@ from ..utils import ( ) class AUEngineIE(InfoExtractor): + _TEST = { + u'url': u'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370', + u'file': u'lfvlytY6.mp4', + u'md5': u'48972bdbcf1a3a2f5533e62425b41d4f', + u'info_dict': { + u"title": u"[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]" + } + } _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed.php\?.*?file=([^&]+).*?' def _real_extract(self, url):