Merge branch 'master' into use-other-downloaders

2013-07-01 23:17:30 -03:00 · 2013-07-01 23:17:30 -03:00 · 227607e7cc
commit 227607e7cc
parent 6813ee5252 59fc531f78
14 changed files with 171 additions and 39 deletions
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@ -8,7 +8,7 @@ import json
 import os
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE
+from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE, YoutubeShowIE
 from youtube_dl.utils import *
 from helper import FakeYDL
@ -88,5 +88,11 @@ class TestYoutubeLists(unittest.TestCase):
        result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0]
        self.assertEqual(len(result['entries']), 2)
    def test_youtube_show(self):
        dl = FakeYDL()
        ie = YoutubeShowIE(dl)
        result = ie.extract('http://www.youtube.com/show/airdisasters')
        self.assertTrue(len(result) >= 4)
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -35,6 +35,7 @@ import codecs
 import getpass
 import optparse
 import os
 import random
 import re
 import shlex
 import socket
@ -118,6 +119,7 @@ def parseOpts(overrideArguments=None):
    selection      = optparse.OptionGroup(parser, 'Video Selection')
    authentication = optparse.OptionGroup(parser, 'Authentication Options')
    video_format   = optparse.OptionGroup(parser, 'Video Format Options')
    downloader     = optparse.OptionGroup(parser, 'Download Options')
    postproc       = optparse.OptionGroup(parser, 'Post-processing Options')
    filesystem     = optparse.OptionGroup(parser, 'Filesystem Options')
    verbosity      = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
@ -130,15 +132,6 @@ def parseOpts(overrideArguments=None):
            action='store_true', dest='update_self', help='update this program to latest version')
    general.add_option('-i', '--ignore-errors',
            action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
    general.add_option('-r', '--rate-limit',
            dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)')
    general.add_option('-R', '--retries',
            dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
    general.add_option('--buffer-size',
            dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024")
    general.add_option('--no-resize-buffer',
            action='store_true', dest='noresizebuffer',
            help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
    general.add_option('--dump-user-agent',
            action='store_true', dest='dump_user_agent',
            help='display the current browser identification', default=False)
@ -150,9 +143,12 @@ def parseOpts(overrideArguments=None):
    general.add_option('--list-extractors',
            action='store_true', dest='list_extractors',
            help='List all supported extractors and the URLs they would handle', default=False)
    general.add_option('--extractor-descriptions',
            action='store_true', dest='list_extractor_descriptions',
            help='Output descriptions of all supported extractors', default=False)
    general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
    general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
-    general.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP)
+
    selection.add_option('--playlist-start',
            dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is %default)', default=1)
@ -211,6 +207,17 @@ def parseOpts(overrideArguments=None):
            action='store', dest='subtitleslang', metavar='LANG',
            help='language of the subtitles to download (optional) use IETF language tags like \'en\'')
    downloader.add_option('-r', '--rate-limit',
            dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)')
    downloader.add_option('-R', '--retries',
            dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
    downloader.add_option('--buffer-size',
            dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024")
    downloader.add_option('--no-resize-buffer',
            action='store_true', dest='noresizebuffer',
            help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
    downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP)
    verbosity.add_option('-q', '--quiet',
            action='store_true', dest='quiet', help='activates quiet mode', default=False)
    verbosity.add_option('-s', '--simulate',
@ -317,6 +324,7 @@ def parseOpts(overrideArguments=None):
    parser.add_option_group(general)
    parser.add_option_group(selection)
    parser.add_option_group(downloader)
    parser.add_option_group(filesystem)
    parser.add_option_group(verbosity)
    parser.add_option_group(video_format)
@ -420,13 +428,25 @@ def _real_main(argv=None):
    extractors = gen_extractors()
    if opts.list_extractors:
-        for ie in extractors:
+        for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
            compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
            matchedUrls = [url for url in all_urls if ie.suitable(url)]
            all_urls = [url for url in all_urls if url not in matchedUrls]
            for mu in matchedUrls:
                compat_print(u'  ' + mu)
        sys.exit(0)
    if opts.list_extractor_descriptions:
        for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
            if not ie._WORKING:
                continue
            desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
            if hasattr(ie, 'SEARCH_KEY'):
                _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise')
                _COUNTS = (u'', u'5', u'10', u'all')
                desc += u' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
            compat_print(desc)
        sys.exit(0)
    # Conflicting, missing and erroneous options
    if opts.usenetrc and (opts.username is not None or opts.password is not None):
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -25,6 +25,7 @@ from .howcast import HowcastIE
 from .hypem import HypemIE
 from .ina import InaIE
 from .infoq import InfoQIE
 from .instagram import InstagramIE
 from .jukebox import JukeboxIE
 from .justintv import JustinTVIE
 from .keek import KeekIE
@ -48,6 +49,7 @@ from .steam import SteamIE
 from .teamcoco import TeamcocoIE
 from .ted import TEDIE
 from .tf1 import TF1IE
 from .traileraddict import TrailerAddictIE
 from .tudou import TudouIE
 from .tumblr import TumblrIE
 from .tutv import TutvIE
@ -66,7 +68,7 @@ from .yahoo import YahooIE, YahooSearchIE
 from .youjizz import YouJizzIE
 from .youku import YoukuIE
 from .youporn import YouPornIE
-from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
+from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE, YoutubeShowIE
 from .zdf import ZDFIE
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@ -12,8 +12,7 @@ from ..utils import (
 class ComedyCentralIE(InfoExtractor):
-    """Information extractor for The Daily Show and Colbert Report """
+    IE_DESC = u'The Daily Show / Colbert Report'
    # urls can be abbreviations like :thedailyshow or :colbert
    # urls for episodes like:
    # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -263,3 +263,7 @@ class SearchInfoExtractor(InfoExtractor):
    def _get_n_results(self, query, n):
        """Get a specified number of results for a query"""
        raise NotImplementedError("This method must be implemented by sublclasses")
    @property
    def SEARCH_KEY(self):
        return self._SEARCH_KEY
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -11,8 +11,7 @@ from ..utils import (
 )
 class GenericIE(InfoExtractor):
-    """Generic last-resort information extractor."""
+    IE_DESC = u'Generic downloader that works on some sites'
    _VALID_URL = r'.*'
    IE_NAME = u'generic'
    _TEST = {
@ -144,7 +143,7 @@ class GenericIE(InfoExtractor):
        #   Video Title - Tagline | Site Name
        # and so on and so forth; it's just not practical
        video_title = self._html_search_regex(r'<title>(.*)</title>',
-            webpage, u'video title', default=u'video')
+            webpage, u'video title', default=u'video', flags=re.DOTALL)
        # video uploader is domain name
        video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
--- a/youtube_dl/extractor/googleplus.py
+++ b/youtube_dl/extractor/googleplus.py
@ -10,8 +10,7 @@ from ..utils import (
 class GooglePlusIE(InfoExtractor):
-    """Information extractor for plus.google.com."""
+    IE_DESC = u'Google Plus'
    _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
    IE_NAME = u'plus.google'
    _TEST = {
--- a/youtube_dl/extractor/googlesearch.py
+++ b/youtube_dl/extractor/googlesearch.py
@ -8,7 +8,7 @@ from ..utils import (
 class GoogleSearchIE(SearchInfoExtractor):
-    """Information Extractor for Google Video search queries."""
+    IE_DESC = u'Google Video search'
    _MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"'
    _MAX_RESULTS = 1000
    IE_NAME = u'video.google:search'
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@ -0,0 +1,42 @@
 import re
 from .common import InfoExtractor
 class InstagramIE(InfoExtractor):
    _VALID_URL = r'(?:http://)?instagram.com/p/(.*?)/'
    _TEST = {
        u'url': u'http://instagram.com/p/aye83DjauH/#',
        u'file': u'aye83DjauH.mp4',
        u'md5': u'0d2da106a9d2631273e192b372806516',
        u'info_dict': {
            u"uploader_id": u"naomipq", 
            u"title": u"Video by naomipq"
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group(1)
        webpage = self._download_webpage(url, video_id)
        video_url = self._html_search_regex(
            r'<meta property="og:video" content="(.+?)"',
            webpage, u'video URL')
        thumbnail_url = self._html_search_regex(
            r'<meta property="og:image" content="(.+?)" />',
            webpage, u'thumbnail URL', fatal=False)
        html_title = self._html_search_regex(
            r'<title>(.+?)</title>',
            webpage, u'title', flags=re.DOTALL)
        title = re.sub(u'(?: *\(Videos?\))? \u2022 Instagram$', '', html_title).strip()
        uploader_id = self._html_search_regex(r'content="(.*?)\'s video on Instagram',
            webpage, u'uploader name', fatal=False)
        ext = 'mp4'
        return [{
            'id':        video_id,
            'url':       video_url,
            'ext':       ext,
            'title':     title,
            'thumbnail': thumbnail_url,
            'uploader_id' : uploader_id
        }]
--- a/youtube_dl/extractor/stanfordoc.py
+++ b/youtube_dl/extractor/stanfordoc.py
@ -16,10 +16,9 @@ from ..utils import (
 class StanfordOpenClassroomIE(InfoExtractor):
    """Information extractor for Stanford's Open ClassRoom"""
    _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
    IE_NAME = u'stanfordoc'
    IE_DESC = u'Stanford Open ClassRoom'
    _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
    _TEST = {
        u'url': u'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
        u'file': u'PracticalUnix_intro-environment.mp4',
--- a/youtube_dl/extractor/traileraddict.py
+++ b/youtube_dl/extractor/traileraddict.py
@ -0,0 +1,49 @@
 import re
 from .common import InfoExtractor
 class TrailerAddictIE(InfoExtractor):
    _VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/trailer/([^/]+)/(?:trailer|feature-trailer)'
    _TEST = {
        u'url': u'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
        u'file': u'76184.mp4',
        u'md5': u'41365557f3c8c397d091da510e73ceb4',
        u'info_dict': {
            u"title": u"Prince Avalanche Trailer",
            u"description": u"Trailer for Prince Avalanche.Two highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind."
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group(1)
        webpage = self._download_webpage(url, video_id)
        title = self._search_regex(r'<title>(.+?)</title>',
                webpage, 'video title').replace(' - Trailer Addict','')
        view_count = self._search_regex(r'Views: (.+?)<br />',
                webpage, 'Views Count')
        description = self._search_regex(r'<meta property="og:description" content="(.+?)" />',
                webpage, 'video description')
        video_id = self._search_regex(r'<meta property="og:video" content="(.+?)" />',
                webpage, 'Video id').split('=')[1]
        info_url = "http://www.traileraddict.com/fvar.php?tid=%s" %(str(video_id))
        info_webpage = self._download_webpage(info_url, video_id , "Downloading the info webpage")
        final_url = self._search_regex(r'&fileurl=(.+)',
                info_webpage, 'Download url').replace('%3F','?')
        thumbnail_url = self._search_regex(r'&image=(.+?)&',
                info_webpage, 'thumbnail url')
        ext = final_url.split('.')[-1].split('?')[0]
        return [{
            'id'          : video_id,
            'url'         : final_url,
            'ext'         : ext,
            'title'       : title,
            'thumbnail'   : thumbnail_url,
            'description' : description,
            'view_count'  : view_count,
        }]
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@ -11,7 +11,7 @@ from ..utils import (
 )
 class YahooIE(InfoExtractor):
-    """Information extractor for screen.yahoo.com."""
+    IE_DESC = u'Yahoo screen'
    _VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
    _TEST = {
        u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
@ -88,8 +88,7 @@ class YahooIE(InfoExtractor):
        return info_dict
 class YahooSearchIE(SearchInfoExtractor):
-    """Information Extractor for Yahoo! Video search queries."""
+    IE_DESC = u'Yahoo screen search'
    _MAX_RESULTS = 1000
    IE_NAME = u'screen.yahoo:search'
    _SEARCH_KEY = 'yvsearch'
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -23,8 +23,7 @@ from ..utils import (
 class YoutubeIE(InfoExtractor):
-    """Information extractor for youtube.com."""
+    IE_DESC = u'YouTube.com'
    _VALID_URL = r"""^
                     (
                         (?:https?://)?                                       # http(s):// (optional)
@ -34,7 +33,7 @@ class YoutubeIE(InfoExtractor):
                         (?:                                                  # the various things that can precede the ID:
                             (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
                             |(?:                                             # or the v= param in all its forms
-                                 (?:watch(?:_popup)?(?:\.php)?)?              # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
+                                 (?:watch|movie(?:_popup)?(?:\.php)?)?              # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
                                 (?:\?|\#!?)                                  # the params delimiter ? or # or #!
                                 (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
                                 v=
@ -402,6 +401,9 @@ class YoutubeIE(InfoExtractor):
        return video_id
    def _real_extract(self, url):
        if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
            self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).')
        # Extract original video URL from URL with redirection, like age verification, using next_url parameter
        mobj = re.search(self._NEXT_URL_RE, url)
        if mobj:
@ -583,7 +585,7 @@ class YoutubeIE(InfoExtractor):
            if req_format is None or req_format == 'best':
                video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
            elif req_format == 'worst':
-                video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
+                video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
            elif req_format in ('-1', 'all'):
                video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
            else:
@ -626,8 +628,7 @@ class YoutubeIE(InfoExtractor):
        return results
 class YoutubePlaylistIE(InfoExtractor):
-    """Information Extractor for YouTube playlists."""
+    IE_DESC = u'YouTube.com playlists'
    _VALID_URL = r"""(?:
                        (?:https?://)?
                        (?:\w+\.)?
@ -694,8 +695,7 @@ class YoutubePlaylistIE(InfoExtractor):
 class YoutubeChannelIE(InfoExtractor):
-    """Information Extractor for YouTube channels."""
+    IE_DESC = u'YouTube.com channels'
    _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
    _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
    _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
@ -753,8 +753,7 @@ class YoutubeChannelIE(InfoExtractor):
 class YoutubeUserIE(InfoExtractor):
-    """Information Extractor for YouTube users."""
+    IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
    _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
    _GDATA_PAGE_SIZE = 50
@ -810,7 +809,7 @@ class YoutubeUserIE(InfoExtractor):
        return [self.playlist_result(url_results, playlist_title = username)]
 class YoutubeSearchIE(SearchInfoExtractor):
-    """Information Extractor for YouTube search queries."""
+    IE_DESC = u'YouTube.com searches'
    _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
    _MAX_RESULTS = 1000
    IE_NAME = u'youtube:search'
@ -850,3 +849,18 @@ class YoutubeSearchIE(SearchInfoExtractor):
            video_ids = video_ids[:n]
        videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
        return self.playlist_result(videos, query)
 class YoutubeShowIE(InfoExtractor):
    IE_DESC = u'YouTube.com (multi-season) shows'
    _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
    IE_NAME = u'youtube:show'
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        show_name = mobj.group(1)
        webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
        # There's one playlist for each season of the show
        m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
        self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
        return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
--- a/youtube_dl/update.py
+++ b/youtube_dl/update.py
@ -44,7 +44,7 @@ def update_self(to_screen, verbose, filename):
    if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, "frozen"):
-        to_screen(u'It looks like you installed youtube-dl with pip, setup.py or a tarball. Please use that to update.')
+        to_screen(u'It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.')
        return
    # Check if there is a new version