Merge branch 'master' into use-other-downloaders

2013-07-04 09:57:02 -03:00 · 2013-07-04 09:57:02 -03:00 · a15f3f40ed
commit a15f3f40ed
parent 227607e7cc 6d6d286539
10 changed files with 121 additions and 59 deletions
--- a/README.md
+++ b/README.md
@ -18,19 +18,13 @@ which means you can modify it, redistribute it or use it however you like.
    --version                  print program version and exit
    -U, --update               update this program to latest version
    -i, --ignore-errors        continue on download errors
-    -r, --rate-limit LIMIT     maximum download rate (e.g. 50k or 44.6m)
-    -R, --retries RETRIES      number of retries (default is 10)
-    --buffer-size SIZE         size of download buffer (e.g. 1024 or 16k)
-                               (default is 1024)
-    --no-resize-buffer         do not automatically adjust the buffer size. By
-                               default, the buffer size is automatically resized
-                               from an initial value of SIZE.
    --dump-user-agent          display the current browser identification
    --user-agent UA            specify a custom user agent
    --referer REF              specify a custom referer, use if the video access
                               is restricted to one domain
    --list-extractors          List all supported extractors and the URLs they
                               would handle
+    --extractor-descriptions   Output descriptions of all supported extractors
    --proxy URL                Use the specified HTTP/HTTPS proxy
    --no-check-certificate     Suppress HTTPS certificate validation.

@ -50,6 +44,15 @@ which means you can modify it, redistribute it or use it however you like.
    --datebefore DATE          download only videos uploaded before this date
    --dateafter DATE           download only videos uploaded after this date

+## Download Options:
+    -r, --rate-limit LIMIT     maximum download rate (e.g. 50k or 44.6m)
+    -R, --retries RETRIES      number of retries (default is 10)
+    --buffer-size SIZE         size of download buffer (e.g. 1024 or 16k)
+                               (default is 1024)
+    --no-resize-buffer         do not automatically adjust the buffer size. By
+                               default, the buffer size is automatically resized
+                               from an initial value of SIZE.
+
 ## Filesystem Options:
    -t, --title                use title in file name (default)
    --id                       use only video ID in file name
@ -194,11 +197,11 @@ Examples:

 ### Can you please put the -b option back?

-Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the -b option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you''re interested in. In that case, simply request it with the -f option and youtube-dl will try to download it.
+Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the `-b` option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you're interested in. In that case, simply request it with the `-f` option and youtube-dl will try to download it.

 ### I get HTTP error 402 when trying to download a video. What's this?

-Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We''re [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl.
+Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl.

 ### I have downloaded a video but how can I play it?

--- a/setup.py
+++ b/setup.py
@ -12,8 +12,9 @@ except ImportError:
    from distutils.core import setup

 try:
+    # This will create an exe that needs Microsoft Visual C++ 2008
+    # Redistributable Package
    import py2exe
-    """This will create an exe that needs Microsoft Visual C++ 2008 Redistributable Package"""
 except ImportError:
    if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
        print("Cannot import py2exe", file=sys.stderr)
@ -26,13 +27,15 @@ py2exe_options = {
    "dist_dir": '.',
    "dll_excludes": ['w9xpopen.exe'],
 }
+
 py2exe_console = [{
    "script": "./youtube_dl/__main__.py",
    "dest_base": "youtube-dl",
 }]
+
 py2exe_params = {
    'console': py2exe_console,
-    'options': { "py2exe": py2exe_options },
+    'options': {"py2exe": py2exe_options},
    'zipfile': None
 }

@ -41,30 +44,34 @@ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
 else:
    params = {
        'scripts': ['bin/youtube-dl'],
-        'data_files': [('etc/bash_completion.d', ['youtube-dl.bash-completion']), # Installing system-wide would require sudo...
-                       ('share/doc/youtube_dl', ['README.txt']),
-                       ('share/man/man1/', ['youtube-dl.1'])]
+        'data_files': [  # Installing system-wide would require sudo...
+            ('etc/bash_completion.d', ['youtube-dl.bash-completion']),
+            ('share/doc/youtube_dl', ['README.txt']),
+            ('share/man/man1/', ['youtube-dl.1'])
+        ]
    }

 # Get the version from youtube_dl/version.py without importing the package
-exec(compile(open('youtube_dl/version.py').read(), 'youtube_dl/version.py', 'exec'))
+exec(compile(open('youtube_dl/version.py').read(),
+             'youtube_dl/version.py', 'exec'))

 setup(
-    name = 'youtube_dl',
-    version = __version__,
-    description = 'YouTube video downloader',
-    long_description = 'Small command-line program to download videos from YouTube.com and other video sites.',
-    url = 'https://github.com/rg3/youtube-dl',
-    author = 'Ricardo Garcia',
-    maintainer = 'Philipp Hagemeister',
-    maintainer_email = 'phihag@phihag.de',
-    packages = ['youtube_dl', 'youtube_dl.extractor'],
+    name='youtube_dl',
+    version=__version__,
+    description='YouTube video downloader',
+    long_description='Small command-line program to download videos from'
+    ' YouTube.com and other video sites.',
+    url='https://github.com/rg3/youtube-dl',
+    author='Ricardo Garcia',
+    maintainer='Philipp Hagemeister',
+    maintainer_email='phihag@phihag.de',
+    packages=['youtube_dl', 'youtube_dl.extractor'],

    # Provokes warning on most systems (why?!)
-    #test_suite = 'nose.collector',
-    #test_requires = ['nosetest'],
+    # test_suite = 'nose.collector',
+    # test_requires = ['nosetest'],

-    classifiers = [
+    classifiers=[
        "Topic :: Multimedia :: Video",
        "Development Status :: 5 - Production/Stable",
        "Environment :: Console",
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -586,14 +586,16 @@ def _real_main(argv=None):
    if opts.verbose:
        ydl.to_screen(u'[debug] youtube-dl version ' + __version__)
        try:
-            sp = subprocess.Popen(['git', 'rev-parse', '--short', 'HEAD'], stdout=subprocess.PIPE, stderr=subprocess.PIPE,
-                                  cwd=os.path.dirname(os.path.abspath(__file__)))
+            sp = subprocess.Popen(
+                ['git', 'rev-parse', '--short', 'HEAD'],
+                stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                cwd=os.path.dirname(os.path.abspath(__file__)))
            out, err = sp.communicate()
            out = out.decode().strip()
            if re.match('[0-9a-f]+', out):
                ydl.to_screen(u'[debug] Git HEAD: ' + out)
        except:
-            pass
+            sys.exc_clear()
        ydl.to_screen(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()))
        ydl.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies))

--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@ -16,7 +16,7 @@ class ArteTvIE(InfoExtractor):
    www.arte.tv/guide, the extraction process is different for each one.
    The videos expire in 7 days, so we can't add tests.
    """
-    _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?:fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
+    _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
    _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?:fr|de)/.*-(?P<id>.*?).html'
    _LIVE_URL = r'index-[0-9]+\.html$'

@ -57,10 +57,11 @@ class ArteTvIE(InfoExtractor):
        mobj = re.match(self._EMISSION_URL, url)
        if mobj is not None:
            name = mobj.group('name')
+            lang = mobj.group('lang')
            # This is not a real id, it can be for example AJT for the news
            # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
            video_id = mobj.group('id')
-            return self._extract_emission(url, video_id)
+            return self._extract_emission(url, video_id, lang)

        mobj = re.match(self._VIDEOS_URL, url)
        if mobj is not None:
@ -72,10 +73,9 @@ class ArteTvIE(InfoExtractor):
            # self.extractLiveStream(url)
            # return

-    def _extract_emission(self, url, video_id):
+    def _extract_emission(self, url, video_id, lang):
        """Extract from www.arte.tv/guide"""
-        webpage = self._download_webpage(url, video_id)
-        json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
+        json_url = 'http://org-www.arte.tv/papi/tvguide/videos/stream/player/F/%s_PLUS7-F/ALL/ALL.json' % video_id

        json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
        self.report_extraction(video_id)
@ -91,6 +91,16 @@ class ArteTvIE(InfoExtractor):
                     }

        formats = player_info['VSR'].values()
+        def _match_lang(f):
+            # Return true if that format is in the language of the url
+            if lang == 'fr':
+                l = 'F'
+            elif lang == 'de':
+                l = 'A'
+            regexes = [r'VO?%s' % l, r'V%s-ST.' % l]
+            return any(re.match(r, f['versionCode']) for r in regexes)
+        # Some formats may not be in the same language as the url
+        formats = filter(_match_lang, formats)
        # We order the formats by quality
        formats = sorted(formats, key=lambda f: int(f['height']))
        # Pick the best quality
--- a/youtube_dl/extractor/auengine.py
+++ b/youtube_dl/extractor/auengine.py
@ -8,6 +8,14 @@ from ..utils import (
 )

 class AUEngineIE(InfoExtractor):
+    _TEST = {
+        u'url': u'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370',
+        u'file': u'lfvlytY6.mp4',
+        u'md5': u'48972bdbcf1a3a2f5533e62425b41d4f',
+        u'info_dict': {
+            u"title": u"[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]"
+        }
+    }
    _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed.php\?.*?file=([^&]+).*?'

    def _real_extract(self, url):
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@ -27,7 +27,7 @@ class BlipTVIE(InfoExtractor):
    _TEST = {
        u'url': u'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
        u'file': u'5779306.m4v',
-        u'md5': u'b2d849efcf7ee18917e4b4d9ff37cafe',
+        u'md5': u'80baf1ec5c3d2019037c1c707d676b9f',
        u'info_dict': {
            u"upload_date": u"20111205", 
            u"description": u"md5:9bc31f227219cde65e47eeec8d2dc596", 
@ -103,7 +103,12 @@ class BlipTVIE(InfoExtractor):
                    data = json_data

                upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
-                video_url = data['media']['url']
+                if 'additionalMedia' in data:
+                    formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height']))
+                    best_format = formats[-1]
+                    video_url = best_format['url']
+                else:
+                    video_url = data['media']['url']
                umobj = re.match(self._URL_EXT, video_url)
                if umobj is None:
                    raise ValueError('Can not determine filename extension')
--- a/youtube_dl/extractor/tudou.py
+++ b/youtube_dl/extractor/tudou.py
@ -1,24 +1,34 @@
+# coding: utf-8
+
 import re
+import json

 from .common import InfoExtractor


 class TudouIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+)\.html)'
+    _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?'
    _TEST = {
        u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
-        u'file': u'159447792.f4v',
-        u'md5': u'ad7c358a01541e926a1e413612c6b10a',
+        u'file': u'159448201.f4v',
+        u'md5': u'140a49ed444bd22f93330985d8475fcb',
        u'info_dict': {
-            u"title": u"\u5361\u9a6c\u4e54\u56fd\u8db3\u5f00\u5927\u811a\u957f\u4f20\u51b2\u540a\u96c6\u9526"
+            u"title": u"卡马乔国足开大脚长传冲吊集锦"
        }
    }

+    def _url_for_id(self, id, quality = None):
+        info_url = "http://v2.tudou.com/f?id="+str(id)
+        if quality:
+            info_url += '&hd' + quality
+        webpage = self._download_webpage(info_url, id, "Opening the info webpage")
+        final_url = self._html_search_regex('>(.+?)</f>',webpage, 'video url')
+        return final_url
+
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(2).replace('.html','')
+        video_id = mobj.group(2)
        webpage = self._download_webpage(url, video_id)
-        video_id = re.search('"k":(.+?),',webpage).group(1)
        title = re.search(",kw:\"(.+)\"",webpage)
        if title is None:
            title = re.search(",kw: \'(.+)\'",webpage)
@ -27,14 +37,27 @@ class TudouIE(InfoExtractor):
        if thumbnail_url is None:
            thumbnail_url = re.search(",pic:\"(.+?)\"",webpage)
        thumbnail_url = thumbnail_url.group(1)
-        info_url = "http://v2.tudou.com/f?id="+str(video_id)
-        webpage = self._download_webpage(info_url, video_id, "Opening the info webpage")
-        final_url = re.search('\>(.+?)\<\/f\>',webpage).group(1)
-        ext = (final_url.split('?')[0]).split('.')[-1]
-        return [{
-            'id':        video_id,
-            'url':       final_url,
-            'ext':       ext,
-            'title':     title,
-            'thumbnail': thumbnail_url,
-        }]
+
+        segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments')
+        segments = json.loads(segs_json)
+        # It looks like the keys are the arguments that have to be passed as
+        # the hd field in the request url, we pick the higher
+        quality = sorted(segments.keys())[-1]
+        parts = segments[quality]
+        result = []
+        len_parts = len(parts)
+        if len_parts > 1:
+            self.to_screen(u'%s: found %s parts' % (video_id, len_parts))
+        for part in parts:
+            part_id = part['k']
+            final_url = self._url_for_id(part_id, quality)
+            ext = (final_url.split('?')[0]).split('.')[-1]
+            part_info = {'id': part_id,
+                          'url': final_url,
+                          'ext': ext,
+                          'title': title,
+                          'thumbnail': thumbnail_url,
+                          }
+            result.append(part_info)
+
+        return result
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -441,7 +441,7 @@ class YoutubeIE(InfoExtractor):
                break
        if 'token' not in video_info:
            if 'reason' in video_info:
-                raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0])
+                raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
            else:
                raise ExtractorError(u'"token" parameter not in video info for unknown reason')

--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -470,10 +470,14 @@ def make_HTTPS_handler(opts):

 class ExtractorError(Exception):
    """Error during info extraction."""
-    def __init__(self, msg, tb=None):
-        """ tb, if given, is the original traceback (so that it can be printed out). """
+    def __init__(self, msg, tb=None, expected=False):
+        """ tb, if given, is the original traceback (so that it can be printed out).
+        If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
+        """

-        if not sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
+        if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
+            expected = True
+        if not expected:
            msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output.'
        super(ExtractorError, self).__init__(msg)

--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@

-__version__ = '2013.06.34.4'
+__version__ = '2013.07.02'