diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 4486b7eb0..dd9e292b0 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -8,7 +8,7 @@ import json import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE +from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE, YoutubeShowIE from youtube_dl.utils import * from helper import FakeYDL @@ -88,5 +88,11 @@ class TestYoutubeLists(unittest.TestCase): result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0] self.assertEqual(len(result['entries']), 2) + def test_youtube_show(self): + dl = FakeYDL() + ie = YoutubeShowIE(dl) + result = ie.extract('http://www.youtube.com/show/airdisasters') + self.assertTrue(len(result) >= 4) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 77432a9a6..8b1b71c67 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -35,6 +35,7 @@ import codecs import getpass import optparse import os +import random import re import shlex import socket @@ -118,6 +119,7 @@ def parseOpts(overrideArguments=None): selection = optparse.OptionGroup(parser, 'Video Selection') authentication = optparse.OptionGroup(parser, 'Authentication Options') video_format = optparse.OptionGroup(parser, 'Video Format Options') + downloader = optparse.OptionGroup(parser, 'Download Options') postproc = optparse.OptionGroup(parser, 'Post-processing Options') filesystem = optparse.OptionGroup(parser, 'Filesystem Options') verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') @@ -130,15 +132,6 @@ def parseOpts(overrideArguments=None): action='store_true', dest='update_self', help='update this program to latest version') general.add_option('-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) - general.add_option('-r', '--rate-limit', - dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)') - general.add_option('-R', '--retries', - dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) - general.add_option('--buffer-size', - dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024") - general.add_option('--no-resize-buffer', - action='store_true', dest='noresizebuffer', - help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False) general.add_option('--dump-user-agent', action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False) @@ -150,9 +143,12 @@ def parseOpts(overrideArguments=None): general.add_option('--list-extractors', action='store_true', dest='list_extractors', help='List all supported extractors and the URLs they would handle', default=False) + general.add_option('--extractor-descriptions', + action='store_true', dest='list_extractor_descriptions', + help='Output descriptions of all supported extractors', default=False) general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL') general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') - general.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP) + selection.add_option('--playlist-start', dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is %default)', default=1) @@ -211,6 +207,17 @@ def parseOpts(overrideArguments=None): action='store', dest='subtitleslang', metavar='LANG', help='language of the subtitles to download (optional) use IETF language tags like \'en\'') + downloader.add_option('-r', '--rate-limit', + dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)') + downloader.add_option('-R', '--retries', + dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) + downloader.add_option('--buffer-size', + dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024") + downloader.add_option('--no-resize-buffer', + action='store_true', dest='noresizebuffer', + help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False) + downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP) + verbosity.add_option('-q', '--quiet', action='store_true', dest='quiet', help='activates quiet mode', default=False) verbosity.add_option('-s', '--simulate', @@ -317,6 +324,7 @@ def parseOpts(overrideArguments=None): parser.add_option_group(general) parser.add_option_group(selection) + parser.add_option_group(downloader) parser.add_option_group(filesystem) parser.add_option_group(verbosity) parser.add_option_group(video_format) @@ -420,13 +428,25 @@ def _real_main(argv=None): extractors = gen_extractors() if opts.list_extractors: - for ie in extractors: + for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()): compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '')) matchedUrls = [url for url in all_urls if ie.suitable(url)] all_urls = [url for url in all_urls if url not in matchedUrls] for mu in matchedUrls: compat_print(u' ' + mu) sys.exit(0) + if opts.list_extractor_descriptions: + for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()): + if not ie._WORKING: + continue + desc = getattr(ie, 'IE_DESC', ie.IE_NAME) + if hasattr(ie, 'SEARCH_KEY'): + _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise') + _COUNTS = (u'', u'5', u'10', u'all') + desc += u' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) + compat_print(desc) + sys.exit(0) + # Conflicting, missing and erroneous options if opts.usenetrc and (opts.username is not None or opts.password is not None): diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 98733e394..41efc57d4 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -25,6 +25,7 @@ from .howcast import HowcastIE from .hypem import HypemIE from .ina import InaIE from .infoq import InfoQIE +from .instagram import InstagramIE from .jukebox import JukeboxIE from .justintv import JustinTVIE from .keek import KeekIE @@ -48,6 +49,7 @@ from .steam import SteamIE from .teamcoco import TeamcocoIE from .ted import TEDIE from .tf1 import TF1IE +from .traileraddict import TrailerAddictIE from .tudou import TudouIE from .tumblr import TumblrIE from .tutv import TutvIE @@ -66,7 +68,7 @@ from .yahoo import YahooIE, YahooSearchIE from .youjizz import YouJizzIE from .youku import YoukuIE from .youporn import YouPornIE -from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE +from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE, YoutubeShowIE from .zdf import ZDFIE diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index d9337f8a1..93d9e3d5e 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -12,8 +12,7 @@ from ..utils import ( class ComedyCentralIE(InfoExtractor): - """Information extractor for The Daily Show and Colbert Report """ - + IE_DESC = u'The Daily Show / Colbert Report' # urls can be abbreviations like :thedailyshow or :colbert # urls for episodes like: # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5c6fd7945..655836ff6 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -263,3 +263,7 @@ class SearchInfoExtractor(InfoExtractor): def _get_n_results(self, query, n): """Get a specified number of results for a query""" raise NotImplementedError("This method must be implemented by sublclasses") + + @property + def SEARCH_KEY(self): + return self._SEARCH_KEY diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 019bbe6e9..20bc53330 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -11,8 +11,7 @@ from ..utils import ( ) class GenericIE(InfoExtractor): - """Generic last-resort information extractor.""" - + IE_DESC = u'Generic downloader that works on some sites' _VALID_URL = r'.*' IE_NAME = u'generic' _TEST = { @@ -144,7 +143,7 @@ class GenericIE(InfoExtractor): # Video Title - Tagline | Site Name # and so on and so forth; it's just not practical video_title = self._html_search_regex(r'(.*)', - webpage, u'video title', default=u'video') + webpage, u'video title', default=u'video', flags=re.DOTALL) # video uploader is domain name video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*', diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index a8f171afd..9f7fc19a4 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -10,8 +10,7 @@ from ..utils import ( class GooglePlusIE(InfoExtractor): - """Information extractor for plus.google.com.""" - + IE_DESC = u'Google Plus' _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)' IE_NAME = u'plus.google' _TEST = { diff --git a/youtube_dl/extractor/googlesearch.py b/youtube_dl/extractor/googlesearch.py index 21c240e51..f9c88e9b5 100644 --- a/youtube_dl/extractor/googlesearch.py +++ b/youtube_dl/extractor/googlesearch.py @@ -8,7 +8,7 @@ from ..utils import ( class GoogleSearchIE(SearchInfoExtractor): - """Information Extractor for Google Video search queries.""" + IE_DESC = u'Google Video search' _MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"' _MAX_RESULTS = 1000 IE_NAME = u'video.google:search' diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py new file mode 100644 index 000000000..6ae704efd --- /dev/null +++ b/youtube_dl/extractor/instagram.py @@ -0,0 +1,42 @@ +import re + +from .common import InfoExtractor + +class InstagramIE(InfoExtractor): + _VALID_URL = r'(?:http://)?instagram.com/p/(.*?)/' + _TEST = { + u'url': u'http://instagram.com/p/aye83DjauH/#', + u'file': u'aye83DjauH.mp4', + u'md5': u'0d2da106a9d2631273e192b372806516', + u'info_dict': { + u"uploader_id": u"naomipq", + u"title": u"Video by naomipq" + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(1) + webpage = self._download_webpage(url, video_id) + video_url = self._html_search_regex( + r'', + webpage, u'thumbnail URL', fatal=False) + html_title = self._html_search_regex( + r'(.+?)', + webpage, u'title', flags=re.DOTALL) + title = re.sub(u'(?: *\(Videos?\))? \u2022 Instagram$', '', html_title).strip() + uploader_id = self._html_search_regex(r'content="(.*?)\'s video on Instagram', + webpage, u'uploader name', fatal=False) + ext = 'mp4' + + return [{ + 'id': video_id, + 'url': video_url, + 'ext': ext, + 'title': title, + 'thumbnail': thumbnail_url, + 'uploader_id' : uploader_id + }] diff --git a/youtube_dl/extractor/stanfordoc.py b/youtube_dl/extractor/stanfordoc.py index 25a0d09f7..b27838bf9 100644 --- a/youtube_dl/extractor/stanfordoc.py +++ b/youtube_dl/extractor/stanfordoc.py @@ -16,10 +16,9 @@ from ..utils import ( class StanfordOpenClassroomIE(InfoExtractor): - """Information extractor for Stanford's Open ClassRoom""" - - _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P[^&]+)(&video=(?P