Merge branch 'master' into use-other-downloaders

2013-07-01 23:17:30 -03:00 · 2013-07-01 23:17:30 -03:00 · 227607e7cc
commit 227607e7cc
parent 6813ee5252 59fc531f78
14 changed files with 171 additions and 39 deletions
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@ -8,7 +8,7 @@ import json
 import os
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

-from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE
+from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE, YoutubeShowIE
 from youtube_dl.utils import *

 from helper import FakeYDL
@ -88,5 +88,11 @@ class TestYoutubeLists(unittest.TestCase):
        result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0]
        self.assertEqual(len(result['entries']), 2)

+    def test_youtube_show(self):
+        dl = FakeYDL()
+        ie = YoutubeShowIE(dl)
+        result = ie.extract('http://www.youtube.com/show/airdisasters')
+        self.assertTrue(len(result) >= 4)
+
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -35,6 +35,7 @@ import codecs
 import getpass
 import optparse
 import os
+import random
 import re
 import shlex
 import socket
@ -118,6 +119,7 @@ def parseOpts(overrideArguments=None):
    selection      = optparse.OptionGroup(parser, 'Video Selection')
    authentication = optparse.OptionGroup(parser, 'Authentication Options')
    video_format   = optparse.OptionGroup(parser, 'Video Format Options')
+    downloader     = optparse.OptionGroup(parser, 'Download Options')
    postproc       = optparse.OptionGroup(parser, 'Post-processing Options')
    filesystem     = optparse.OptionGroup(parser, 'Filesystem Options')
    verbosity      = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
@ -130,15 +132,6 @@ def parseOpts(overrideArguments=None):
            action='store_true', dest='update_self', help='update this program to latest version')
    general.add_option('-i', '--ignore-errors',
            action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
-    general.add_option('-r', '--rate-limit',
-            dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)')
-    general.add_option('-R', '--retries',
-            dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
-    general.add_option('--buffer-size',
-            dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024")
-    general.add_option('--no-resize-buffer',
-            action='store_true', dest='noresizebuffer',
-            help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
    general.add_option('--dump-user-agent',
            action='store_true', dest='dump_user_agent',
            help='display the current browser identification', default=False)
@ -150,9 +143,12 @@ def parseOpts(overrideArguments=None):
    general.add_option('--list-extractors',
            action='store_true', dest='list_extractors',
            help='List all supported extractors and the URLs they would handle', default=False)
+    general.add_option('--extractor-descriptions',
+            action='store_true', dest='list_extractor_descriptions',
+            help='Output descriptions of all supported extractors', default=False)
    general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
    general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
-    general.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP)
+

    selection.add_option('--playlist-start',
            dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is %default)', default=1)
@ -211,6 +207,17 @@ def parseOpts(overrideArguments=None):
            action='store', dest='subtitleslang', metavar='LANG',
            help='language of the subtitles to download (optional) use IETF language tags like \'en\'')

+    downloader.add_option('-r', '--rate-limit',
+            dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)')
+    downloader.add_option('-R', '--retries',
+            dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
+    downloader.add_option('--buffer-size',
+            dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024")
+    downloader.add_option('--no-resize-buffer',
+            action='store_true', dest='noresizebuffer',
+            help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
+    downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP)
+
    verbosity.add_option('-q', '--quiet',
            action='store_true', dest='quiet', help='activates quiet mode', default=False)
    verbosity.add_option('-s', '--simulate',
@ -317,6 +324,7 @@ def parseOpts(overrideArguments=None):

    parser.add_option_group(general)
    parser.add_option_group(selection)
+    parser.add_option_group(downloader)
    parser.add_option_group(filesystem)
    parser.add_option_group(verbosity)
    parser.add_option_group(video_format)
@ -420,13 +428,25 @@ def _real_main(argv=None):
    extractors = gen_extractors()

    if opts.list_extractors:
-        for ie in extractors:
+        for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
            compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
            matchedUrls = [url for url in all_urls if ie.suitable(url)]
            all_urls = [url for url in all_urls if url not in matchedUrls]
            for mu in matchedUrls:
                compat_print(u'  ' + mu)
        sys.exit(0)
+    if opts.list_extractor_descriptions:
+        for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
+            if not ie._WORKING:
+                continue
+            desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
+            if hasattr(ie, 'SEARCH_KEY'):
+                _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise')
+                _COUNTS = (u'', u'5', u'10', u'all')
+                desc += u' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
+            compat_print(desc)
+        sys.exit(0)
+

    # Conflicting, missing and erroneous options
    if opts.usenetrc and (opts.username is not None or opts.password is not None):
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -25,6 +25,7 @@ from .howcast import HowcastIE
 from .hypem import HypemIE
 from .ina import InaIE
 from .infoq import InfoQIE
+from .instagram import InstagramIE
 from .jukebox import JukeboxIE
 from .justintv import JustinTVIE
 from .keek import KeekIE
@ -48,6 +49,7 @@ from .steam import SteamIE
 from .teamcoco import TeamcocoIE
 from .ted import TEDIE
 from .tf1 import TF1IE
+from .traileraddict import TrailerAddictIE
 from .tudou import TudouIE
 from .tumblr import TumblrIE
 from .tutv import TutvIE
@ -66,7 +68,7 @@ from .yahoo import YahooIE, YahooSearchIE
 from .youjizz import YouJizzIE
 from .youku import YoukuIE
 from .youporn import YouPornIE
-from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
+from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE, YoutubeShowIE
 from .zdf import ZDFIE


--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@ -12,8 +12,7 @@ from ..utils import (


 class ComedyCentralIE(InfoExtractor):
-    """Information extractor for The Daily Show and Colbert Report """
-
+    IE_DESC = u'The Daily Show / Colbert Report'
    # urls can be abbreviations like :thedailyshow or :colbert
    # urls for episodes like:
    # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -263,3 +263,7 @@ class SearchInfoExtractor(InfoExtractor):
    def _get_n_results(self, query, n):
        """Get a specified number of results for a query"""
        raise NotImplementedError("This method must be implemented by sublclasses")
+
+    @property
+    def SEARCH_KEY(self):
+        return self._SEARCH_KEY
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -11,8 +11,7 @@ from ..utils import (
 )

 class GenericIE(InfoExtractor):
-    """Generic last-resort information extractor."""
-
+    IE_DESC = u'Generic downloader that works on some sites'
    _VALID_URL = r'.*'
    IE_NAME = u'generic'
    _TEST = {
@ -144,7 +143,7 @@ class GenericIE(InfoExtractor):
        #   Video Title - Tagline | Site Name
        # and so on and so forth; it's just not practical
        video_title = self._html_search_regex(r'<title>(.*)</title>',
-            webpage, u'video title', default=u'video')
+            webpage, u'video title', default=u'video', flags=re.DOTALL)

        # video uploader is domain name
        video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
--- a/youtube_dl/extractor/googleplus.py
+++ b/youtube_dl/extractor/googleplus.py
@ -10,8 +10,7 @@ from ..utils import (


 class GooglePlusIE(InfoExtractor):
-    """Information extractor for plus.google.com."""
-
+    IE_DESC = u'Google Plus'
    _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
    IE_NAME = u'plus.google'
    _TEST = {
--- a/youtube_dl/extractor/googlesearch.py
+++ b/youtube_dl/extractor/googlesearch.py
@ -8,7 +8,7 @@ from ..utils import (


 class GoogleSearchIE(SearchInfoExtractor):
-    """Information Extractor for Google Video search queries."""
+    IE_DESC = u'Google Video search'
    _MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"'
    _MAX_RESULTS = 1000
    IE_NAME = u'video.google:search'
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@ -0,0 +1,42 @@
+import re
+
+from .common import InfoExtractor
+
+class InstagramIE(InfoExtractor):
+    _VALID_URL = r'(?:http://)?instagram.com/p/(.*?)/'
+    _TEST = {
+        u'url': u'http://instagram.com/p/aye83DjauH/#',
+        u'file': u'aye83DjauH.mp4',
+        u'md5': u'0d2da106a9d2631273e192b372806516',
+        u'info_dict': {
+            u"uploader_id": u"naomipq", 
+            u"title": u"Video by naomipq"
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(1)
+        webpage = self._download_webpage(url, video_id)
+        video_url = self._html_search_regex(
+            r'<meta property="og:video" content="(.+?)"',
+            webpage, u'video URL')
+        thumbnail_url = self._html_search_regex(
+            r'<meta property="og:image" content="(.+?)" />',
+            webpage, u'thumbnail URL', fatal=False)
+        html_title = self._html_search_regex(
+            r'<title>(.+?)</title>',
+            webpage, u'title', flags=re.DOTALL)
+        title = re.sub(u'(?: *\(Videos?\))? \u2022 Instagram$', '', html_title).strip()
+        uploader_id = self._html_search_regex(r'content="(.*?)\'s video on Instagram',
+            webpage, u'uploader name', fatal=False)
+        ext = 'mp4'
+
+        return [{
+            'id':        video_id,
+            'url':       video_url,
+            'ext':       ext,
+            'title':     title,
+            'thumbnail': thumbnail_url,
+            'uploader_id' : uploader_id
+        }]
--- a/youtube_dl/extractor/stanfordoc.py
+++ b/youtube_dl/extractor/stanfordoc.py
@ -16,10 +16,9 @@ from ..utils import (


 class StanfordOpenClassroomIE(InfoExtractor):
-    """Information extractor for Stanford's Open ClassRoom"""
-
-    _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
    IE_NAME = u'stanfordoc'
+    IE_DESC = u'Stanford Open ClassRoom'
+    _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
    _TEST = {
        u'url': u'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
        u'file': u'PracticalUnix_intro-environment.mp4',
--- a/youtube_dl/extractor/traileraddict.py
+++ b/youtube_dl/extractor/traileraddict.py
@ -0,0 +1,49 @@
+import re
+
+from .common import InfoExtractor
+
+
+class TrailerAddictIE(InfoExtractor):
+    _VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/trailer/([^/]+)/(?:trailer|feature-trailer)'
+    _TEST = {
+        u'url': u'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
+        u'file': u'76184.mp4',
+        u'md5': u'41365557f3c8c397d091da510e73ceb4',
+        u'info_dict': {
+            u"title": u"Prince Avalanche Trailer",
+            u"description": u"Trailer for Prince Avalanche.Two highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind."
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(1)
+        webpage = self._download_webpage(url, video_id)
+        
+        title = self._search_regex(r'<title>(.+?)</title>',
+                webpage, 'video title').replace(' - Trailer Addict','')
+        view_count = self._search_regex(r'Views: (.+?)<br />',
+                webpage, 'Views Count')
+        description = self._search_regex(r'<meta property="og:description" content="(.+?)" />',
+                webpage, 'video description')
+        video_id = self._search_regex(r'<meta property="og:video" content="(.+?)" />',
+                webpage, 'Video id').split('=')[1]
+        
+        info_url = "http://www.traileraddict.com/fvar.php?tid=%s" %(str(video_id))
+        info_webpage = self._download_webpage(info_url, video_id , "Downloading the info webpage")
+        
+        final_url = self._search_regex(r'&fileurl=(.+)',
+                info_webpage, 'Download url').replace('%3F','?')
+        thumbnail_url = self._search_regex(r'&image=(.+?)&',
+                info_webpage, 'thumbnail url')
+        ext = final_url.split('.')[-1].split('?')[0]
+        
+        return [{
+            'id'          : video_id,
+            'url'         : final_url,
+            'ext'         : ext,
+            'title'       : title,
+            'thumbnail'   : thumbnail_url,
+            'description' : description,
+            'view_count'  : view_count,
+        }]
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@ -11,7 +11,7 @@ from ..utils import (
 )

 class YahooIE(InfoExtractor):
-    """Information extractor for screen.yahoo.com."""
+    IE_DESC = u'Yahoo screen'
    _VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
    _TEST = {
        u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
@ -88,8 +88,7 @@ class YahooIE(InfoExtractor):
        return info_dict

 class YahooSearchIE(SearchInfoExtractor):
-    """Information Extractor for Yahoo! Video search queries."""
-
+    IE_DESC = u'Yahoo screen search'
    _MAX_RESULTS = 1000
    IE_NAME = u'screen.yahoo:search'
    _SEARCH_KEY = 'yvsearch'
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -23,8 +23,7 @@ from ..utils import (


 class YoutubeIE(InfoExtractor):
-    """Information extractor for youtube.com."""
-
+    IE_DESC = u'YouTube.com'
    _VALID_URL = r"""^
                     (
                         (?:https?://)?                                       # http(s):// (optional)
@ -34,7 +33,7 @@ class YoutubeIE(InfoExtractor):
                         (?:                                                  # the various things that can precede the ID:
                             (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
                             |(?:                                             # or the v= param in all its forms
-                                 (?:watch(?:_popup)?(?:\.php)?)?              # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
+                                 (?:watch|movie(?:_popup)?(?:\.php)?)?              # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
                                 (?:\?|\#!?)                                  # the params delimiter ? or # or #!
                                 (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
                                 v=
@ -402,6 +401,9 @@ class YoutubeIE(InfoExtractor):
        return video_id

    def _real_extract(self, url):
+        if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
+            self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like  youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply  youtube-dl BaW_jenozKc  ).')
+
        # Extract original video URL from URL with redirection, like age verification, using next_url parameter
        mobj = re.search(self._NEXT_URL_RE, url)
        if mobj:
@ -583,7 +585,7 @@ class YoutubeIE(InfoExtractor):
            if req_format is None or req_format == 'best':
                video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
            elif req_format == 'worst':
-                video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
+                video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
            elif req_format in ('-1', 'all'):
                video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
            else:
@ -626,8 +628,7 @@ class YoutubeIE(InfoExtractor):
        return results

 class YoutubePlaylistIE(InfoExtractor):
-    """Information Extractor for YouTube playlists."""
-
+    IE_DESC = u'YouTube.com playlists'
    _VALID_URL = r"""(?:
                        (?:https?://)?
                        (?:\w+\.)?
@ -694,8 +695,7 @@ class YoutubePlaylistIE(InfoExtractor):


 class YoutubeChannelIE(InfoExtractor):
-    """Information Extractor for YouTube channels."""
-
+    IE_DESC = u'YouTube.com channels'
    _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
    _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
    _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
@ -753,8 +753,7 @@ class YoutubeChannelIE(InfoExtractor):


 class YoutubeUserIE(InfoExtractor):
-    """Information Extractor for YouTube users."""
-
+    IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
    _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
    _GDATA_PAGE_SIZE = 50
@ -810,7 +809,7 @@ class YoutubeUserIE(InfoExtractor):
        return [self.playlist_result(url_results, playlist_title = username)]

 class YoutubeSearchIE(SearchInfoExtractor):
-    """Information Extractor for YouTube search queries."""
+    IE_DESC = u'YouTube.com searches'
    _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
    _MAX_RESULTS = 1000
    IE_NAME = u'youtube:search'
@ -850,3 +849,18 @@ class YoutubeSearchIE(SearchInfoExtractor):
            video_ids = video_ids[:n]
        videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
        return self.playlist_result(videos, query)
+
+
+class YoutubeShowIE(InfoExtractor):
+    IE_DESC = u'YouTube.com (multi-season) shows'
+    _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
+    IE_NAME = u'youtube:show'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        show_name = mobj.group(1)
+        webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
+        # There's one playlist for each season of the show
+        m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
+        self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
+        return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
--- a/youtube_dl/update.py
+++ b/youtube_dl/update.py
@ -44,7 +44,7 @@ def update_self(to_screen, verbose, filename):


    if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, "frozen"):
-        to_screen(u'It looks like you installed youtube-dl with pip, setup.py or a tarball. Please use that to update.')
+        to_screen(u'It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.')
        return

    # Check if there is a new version