diff --git a/README.md b/README.md index ceb85fef1..b246d3c53 100644 --- a/README.md +++ b/README.md @@ -18,19 +18,13 @@ which means you can modify it, redistribute it or use it however you like. --version print program version and exit -U, --update update this program to latest version -i, --ignore-errors continue on download errors - -r, --rate-limit LIMIT maximum download rate (e.g. 50k or 44.6m) - -R, --retries RETRIES number of retries (default is 10) - --buffer-size SIZE size of download buffer (e.g. 1024 or 16k) - (default is 1024) - --no-resize-buffer do not automatically adjust the buffer size. By - default, the buffer size is automatically resized - from an initial value of SIZE. --dump-user-agent display the current browser identification --user-agent UA specify a custom user agent --referer REF specify a custom referer, use if the video access is restricted to one domain --list-extractors List all supported extractors and the URLs they would handle + --extractor-descriptions Output descriptions of all supported extractors --proxy URL Use the specified HTTP/HTTPS proxy --no-check-certificate Suppress HTTPS certificate validation. @@ -50,6 +44,15 @@ which means you can modify it, redistribute it or use it however you like. --datebefore DATE download only videos uploaded before this date --dateafter DATE download only videos uploaded after this date +## Download Options: + -r, --rate-limit LIMIT maximum download rate (e.g. 50k or 44.6m) + -R, --retries RETRIES number of retries (default is 10) + --buffer-size SIZE size of download buffer (e.g. 1024 or 16k) + (default is 1024) + --no-resize-buffer do not automatically adjust the buffer size. By + default, the buffer size is automatically resized + from an initial value of SIZE. + ## Filesystem Options: -t, --title use title in file name (default) --id use only video ID in file name @@ -194,11 +197,11 @@ Examples: ### Can you please put the -b option back? -Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the -b option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you''re interested in. In that case, simply request it with the -f option and youtube-dl will try to download it. +Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the `-b` option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you're interested in. In that case, simply request it with the `-f` option and youtube-dl will try to download it. ### I get HTTP error 402 when trying to download a video. What's this? -Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We''re [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl. +Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl. ### I have downloaded a video but how can I play it? diff --git a/setup.py b/setup.py index 61435fcb7..3b6dc2d40 100644 --- a/setup.py +++ b/setup.py @@ -12,8 +12,9 @@ except ImportError: from distutils.core import setup try: + # This will create an exe that needs Microsoft Visual C++ 2008 + # Redistributable Package import py2exe - """This will create an exe that needs Microsoft Visual C++ 2008 Redistributable Package""" except ImportError: if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': print("Cannot import py2exe", file=sys.stderr) @@ -26,13 +27,15 @@ py2exe_options = { "dist_dir": '.', "dll_excludes": ['w9xpopen.exe'], } + py2exe_console = [{ "script": "./youtube_dl/__main__.py", "dest_base": "youtube-dl", }] + py2exe_params = { 'console': py2exe_console, - 'options': { "py2exe": py2exe_options }, + 'options': {"py2exe": py2exe_options}, 'zipfile': None } @@ -41,30 +44,34 @@ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': else: params = { 'scripts': ['bin/youtube-dl'], - 'data_files': [('etc/bash_completion.d', ['youtube-dl.bash-completion']), # Installing system-wide would require sudo... - ('share/doc/youtube_dl', ['README.txt']), - ('share/man/man1/', ['youtube-dl.1'])] + 'data_files': [ # Installing system-wide would require sudo... + ('etc/bash_completion.d', ['youtube-dl.bash-completion']), + ('share/doc/youtube_dl', ['README.txt']), + ('share/man/man1/', ['youtube-dl.1']) + ] } # Get the version from youtube_dl/version.py without importing the package -exec(compile(open('youtube_dl/version.py').read(), 'youtube_dl/version.py', 'exec')) +exec(compile(open('youtube_dl/version.py').read(), + 'youtube_dl/version.py', 'exec')) setup( - name = 'youtube_dl', - version = __version__, - description = 'YouTube video downloader', - long_description = 'Small command-line program to download videos from YouTube.com and other video sites.', - url = 'https://github.com/rg3/youtube-dl', - author = 'Ricardo Garcia', - maintainer = 'Philipp Hagemeister', - maintainer_email = 'phihag@phihag.de', - packages = ['youtube_dl', 'youtube_dl.extractor'], + name='youtube_dl', + version=__version__, + description='YouTube video downloader', + long_description='Small command-line program to download videos from' + ' YouTube.com and other video sites.', + url='https://github.com/rg3/youtube-dl', + author='Ricardo Garcia', + maintainer='Philipp Hagemeister', + maintainer_email='phihag@phihag.de', + packages=['youtube_dl', 'youtube_dl.extractor'], # Provokes warning on most systems (why?!) - #test_suite = 'nose.collector', - #test_requires = ['nosetest'], + # test_suite = 'nose.collector', + # test_requires = ['nosetest'], - classifiers = [ + classifiers=[ "Topic :: Multimedia :: Video", "Development Status :: 5 - Production/Stable", "Environment :: Console", diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 8b1b71c67..fbfdd32c7 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -586,14 +586,16 @@ def _real_main(argv=None): if opts.verbose: ydl.to_screen(u'[debug] youtube-dl version ' + __version__) try: - sp = subprocess.Popen(['git', 'rev-parse', '--short', 'HEAD'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, - cwd=os.path.dirname(os.path.abspath(__file__))) + sp = subprocess.Popen( + ['git', 'rev-parse', '--short', 'HEAD'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + cwd=os.path.dirname(os.path.abspath(__file__))) out, err = sp.communicate() out = out.decode().strip() if re.match('[0-9a-f]+', out): ydl.to_screen(u'[debug] Git HEAD: ' + out) except: - pass + sys.exc_clear() ydl.to_screen(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform())) ydl.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies)) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 183274eb7..a030a28bb 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -16,7 +16,7 @@ class ArteTvIE(InfoExtractor): www.arte.tv/guide, the extraction process is different for each one. The videos expire in 7 days, so we can't add tests. """ - _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?:fr|de)/(?:(?:sendungen|emissions)/)?(?P.*?)/(?P.*?)(\?.*)?' + _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?Pfr|de)/(?:(?:sendungen|emissions)/)?(?P.*?)/(?P.*?)(\?.*)?' _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?:fr|de)/.*-(?P.*?).html' _LIVE_URL = r'index-[0-9]+\.html$' @@ -57,10 +57,11 @@ class ArteTvIE(InfoExtractor): mobj = re.match(self._EMISSION_URL, url) if mobj is not None: name = mobj.group('name') + lang = mobj.group('lang') # This is not a real id, it can be for example AJT for the news # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal video_id = mobj.group('id') - return self._extract_emission(url, video_id) + return self._extract_emission(url, video_id, lang) mobj = re.match(self._VIDEOS_URL, url) if mobj is not None: @@ -72,10 +73,9 @@ class ArteTvIE(InfoExtractor): # self.extractLiveStream(url) # return - def _extract_emission(self, url, video_id): + def _extract_emission(self, url, video_id, lang): """Extract from www.arte.tv/guide""" - webpage = self._download_webpage(url, video_id) - json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url') + json_url = 'http://org-www.arte.tv/papi/tvguide/videos/stream/player/F/%s_PLUS7-F/ALL/ALL.json' % video_id json_info = self._download_webpage(json_url, video_id, 'Downloading info json') self.report_extraction(video_id) @@ -91,6 +91,16 @@ class ArteTvIE(InfoExtractor): } formats = player_info['VSR'].values() + def _match_lang(f): + # Return true if that format is in the language of the url + if lang == 'fr': + l = 'F' + elif lang == 'de': + l = 'A' + regexes = [r'VO?%s' % l, r'V%s-ST.' % l] + return any(re.match(r, f['versionCode']) for r in regexes) + # Some formats may not be in the same language as the url + formats = filter(_match_lang, formats) # We order the formats by quality formats = sorted(formats, key=lambda f: int(f['height'])) # Pick the best quality diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py index 3b4ade3bf..0febbff4f 100644 --- a/youtube_dl/extractor/auengine.py +++ b/youtube_dl/extractor/auengine.py @@ -8,6 +8,14 @@ from ..utils import ( ) class AUEngineIE(InfoExtractor): + _TEST = { + u'url': u'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370', + u'file': u'lfvlytY6.mp4', + u'md5': u'48972bdbcf1a3a2f5533e62425b41d4f', + u'info_dict': { + u"title": u"[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]" + } + } _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed.php\?.*?file=([^&]+).*?' def _real_extract(self, url): diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index 37141e6a0..f7af65606 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -27,7 +27,7 @@ class BlipTVIE(InfoExtractor): _TEST = { u'url': u'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352', u'file': u'5779306.m4v', - u'md5': u'b2d849efcf7ee18917e4b4d9ff37cafe', + u'md5': u'80baf1ec5c3d2019037c1c707d676b9f', u'info_dict': { u"upload_date": u"20111205", u"description": u"md5:9bc31f227219cde65e47eeec8d2dc596", @@ -103,7 +103,12 @@ class BlipTVIE(InfoExtractor): data = json_data upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') - video_url = data['media']['url'] + if 'additionalMedia' in data: + formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height'])) + best_format = formats[-1] + video_url = best_format['url'] + else: + video_url = data['media']['url'] umobj = re.match(self._URL_EXT, video_url) if umobj is None: raise ValueError('Can not determine filename extension') diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index 4681a6f79..1405b73f7 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -1,24 +1,34 @@ +# coding: utf-8 + import re +import json from .common import InfoExtractor class TudouIE(InfoExtractor): - _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+)\.html)' + _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?' _TEST = { u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', - u'file': u'159447792.f4v', - u'md5': u'ad7c358a01541e926a1e413612c6b10a', + u'file': u'159448201.f4v', + u'md5': u'140a49ed444bd22f93330985d8475fcb', u'info_dict': { - u"title": u"\u5361\u9a6c\u4e54\u56fd\u8db3\u5f00\u5927\u811a\u957f\u4f20\u51b2\u540a\u96c6\u9526" + u"title": u"卡马乔国足开大脚长传冲吊集锦" } } + def _url_for_id(self, id, quality = None): + info_url = "http://v2.tudou.com/f?id="+str(id) + if quality: + info_url += '&hd' + quality + webpage = self._download_webpage(info_url, id, "Opening the info webpage") + final_url = self._html_search_regex('>(.+?)',webpage, 'video url') + return final_url + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(2).replace('.html','') + video_id = mobj.group(2) webpage = self._download_webpage(url, video_id) - video_id = re.search('"k":(.+?),',webpage).group(1) title = re.search(",kw:\"(.+)\"",webpage) if title is None: title = re.search(",kw: \'(.+)\'",webpage) @@ -27,14 +37,27 @@ class TudouIE(InfoExtractor): if thumbnail_url is None: thumbnail_url = re.search(",pic:\"(.+?)\"",webpage) thumbnail_url = thumbnail_url.group(1) - info_url = "http://v2.tudou.com/f?id="+str(video_id) - webpage = self._download_webpage(info_url, video_id, "Opening the info webpage") - final_url = re.search('\>(.+?)\<\/f\>',webpage).group(1) - ext = (final_url.split('?')[0]).split('.')[-1] - return [{ - 'id': video_id, - 'url': final_url, - 'ext': ext, - 'title': title, - 'thumbnail': thumbnail_url, - }] + + segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments') + segments = json.loads(segs_json) + # It looks like the keys are the arguments that have to be passed as + # the hd field in the request url, we pick the higher + quality = sorted(segments.keys())[-1] + parts = segments[quality] + result = [] + len_parts = len(parts) + if len_parts > 1: + self.to_screen(u'%s: found %s parts' % (video_id, len_parts)) + for part in parts: + part_id = part['k'] + final_url = self._url_for_id(part_id, quality) + ext = (final_url.split('?')[0]).split('.')[-1] + part_info = {'id': part_id, + 'url': final_url, + 'ext': ext, + 'title': title, + 'thumbnail': thumbnail_url, + } + result.append(part_info) + + return result diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b526e0c53..14a8bd6ea 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -441,7 +441,7 @@ class YoutubeIE(InfoExtractor): break if 'token' not in video_info: if 'reason' in video_info: - raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0]) + raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True) else: raise ExtractorError(u'"token" parameter not in video info for unknown reason') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f9e7ce956..9137a4f70 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -470,10 +470,14 @@ def make_HTTPS_handler(opts): class ExtractorError(Exception): """Error during info extraction.""" - def __init__(self, msg, tb=None): - """ tb, if given, is the original traceback (so that it can be printed out). """ + def __init__(self, msg, tb=None, expected=False): + """ tb, if given, is the original traceback (so that it can be printed out). + If expected is set, this is a normal error message and most likely not a bug in youtube-dl. + """ - if not sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): + if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): + expected = True + if not expected: msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output.' super(ExtractorError, self).__init__(msg) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7bba3a883..bc4ad90be 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.06.34.4' +__version__ = '2013.07.02'