Merge branch 'master' into use-other-downloaders

This commit is contained in:
Rogério Brito 2013-07-01 23:17:30 -03:00
commit 227607e7cc
14 changed files with 171 additions and 39 deletions

View File

@ -8,7 +8,7 @@ import json
import os import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE, YoutubeShowIE
from youtube_dl.utils import * from youtube_dl.utils import *
from helper import FakeYDL from helper import FakeYDL
@ -88,5 +88,11 @@ class TestYoutubeLists(unittest.TestCase):
result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0] result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0]
self.assertEqual(len(result['entries']), 2) self.assertEqual(len(result['entries']), 2)
def test_youtube_show(self):
dl = FakeYDL()
ie = YoutubeShowIE(dl)
result = ie.extract('http://www.youtube.com/show/airdisasters')
self.assertTrue(len(result) >= 4)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -35,6 +35,7 @@ import codecs
import getpass import getpass
import optparse import optparse
import os import os
import random
import re import re
import shlex import shlex
import socket import socket
@ -118,6 +119,7 @@ def parseOpts(overrideArguments=None):
selection = optparse.OptionGroup(parser, 'Video Selection') selection = optparse.OptionGroup(parser, 'Video Selection')
authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication = optparse.OptionGroup(parser, 'Authentication Options')
video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format = optparse.OptionGroup(parser, 'Video Format Options')
downloader = optparse.OptionGroup(parser, 'Download Options')
postproc = optparse.OptionGroup(parser, 'Post-processing Options') postproc = optparse.OptionGroup(parser, 'Post-processing Options')
filesystem = optparse.OptionGroup(parser, 'Filesystem Options') filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
@ -130,15 +132,6 @@ def parseOpts(overrideArguments=None):
action='store_true', dest='update_self', help='update this program to latest version') action='store_true', dest='update_self', help='update this program to latest version')
general.add_option('-i', '--ignore-errors', general.add_option('-i', '--ignore-errors',
action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
general.add_option('-r', '--rate-limit',
dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)')
general.add_option('-R', '--retries',
dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
general.add_option('--buffer-size',
dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024")
general.add_option('--no-resize-buffer',
action='store_true', dest='noresizebuffer',
help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
general.add_option('--dump-user-agent', general.add_option('--dump-user-agent',
action='store_true', dest='dump_user_agent', action='store_true', dest='dump_user_agent',
help='display the current browser identification', default=False) help='display the current browser identification', default=False)
@ -150,9 +143,12 @@ def parseOpts(overrideArguments=None):
general.add_option('--list-extractors', general.add_option('--list-extractors',
action='store_true', dest='list_extractors', action='store_true', dest='list_extractors',
help='List all supported extractors and the URLs they would handle', default=False) help='List all supported extractors and the URLs they would handle', default=False)
general.add_option('--extractor-descriptions',
action='store_true', dest='list_extractor_descriptions',
help='Output descriptions of all supported extractors', default=False)
general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL') general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
general.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP)
selection.add_option('--playlist-start', selection.add_option('--playlist-start',
dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is %default)', default=1) dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is %default)', default=1)
@ -211,6 +207,17 @@ def parseOpts(overrideArguments=None):
action='store', dest='subtitleslang', metavar='LANG', action='store', dest='subtitleslang', metavar='LANG',
help='language of the subtitles to download (optional) use IETF language tags like \'en\'') help='language of the subtitles to download (optional) use IETF language tags like \'en\'')
downloader.add_option('-r', '--rate-limit',
dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)')
downloader.add_option('-R', '--retries',
dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
downloader.add_option('--buffer-size',
dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024")
downloader.add_option('--no-resize-buffer',
action='store_true', dest='noresizebuffer',
help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP)
verbosity.add_option('-q', '--quiet', verbosity.add_option('-q', '--quiet',
action='store_true', dest='quiet', help='activates quiet mode', default=False) action='store_true', dest='quiet', help='activates quiet mode', default=False)
verbosity.add_option('-s', '--simulate', verbosity.add_option('-s', '--simulate',
@ -317,6 +324,7 @@ def parseOpts(overrideArguments=None):
parser.add_option_group(general) parser.add_option_group(general)
parser.add_option_group(selection) parser.add_option_group(selection)
parser.add_option_group(downloader)
parser.add_option_group(filesystem) parser.add_option_group(filesystem)
parser.add_option_group(verbosity) parser.add_option_group(verbosity)
parser.add_option_group(video_format) parser.add_option_group(video_format)
@ -420,13 +428,25 @@ def _real_main(argv=None):
extractors = gen_extractors() extractors = gen_extractors()
if opts.list_extractors: if opts.list_extractors:
for ie in extractors: for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '')) compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
matchedUrls = [url for url in all_urls if ie.suitable(url)] matchedUrls = [url for url in all_urls if ie.suitable(url)]
all_urls = [url for url in all_urls if url not in matchedUrls] all_urls = [url for url in all_urls if url not in matchedUrls]
for mu in matchedUrls: for mu in matchedUrls:
compat_print(u' ' + mu) compat_print(u' ' + mu)
sys.exit(0) sys.exit(0)
if opts.list_extractor_descriptions:
for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
if not ie._WORKING:
continue
desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
if hasattr(ie, 'SEARCH_KEY'):
_SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise')
_COUNTS = (u'', u'5', u'10', u'all')
desc += u' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
compat_print(desc)
sys.exit(0)
# Conflicting, missing and erroneous options # Conflicting, missing and erroneous options
if opts.usenetrc and (opts.username is not None or opts.password is not None): if opts.usenetrc and (opts.username is not None or opts.password is not None):

View File

@ -25,6 +25,7 @@ from .howcast import HowcastIE
from .hypem import HypemIE from .hypem import HypemIE
from .ina import InaIE from .ina import InaIE
from .infoq import InfoQIE from .infoq import InfoQIE
from .instagram import InstagramIE
from .jukebox import JukeboxIE from .jukebox import JukeboxIE
from .justintv import JustinTVIE from .justintv import JustinTVIE
from .keek import KeekIE from .keek import KeekIE
@ -48,6 +49,7 @@ from .steam import SteamIE
from .teamcoco import TeamcocoIE from .teamcoco import TeamcocoIE
from .ted import TEDIE from .ted import TEDIE
from .tf1 import TF1IE from .tf1 import TF1IE
from .traileraddict import TrailerAddictIE
from .tudou import TudouIE from .tudou import TudouIE
from .tumblr import TumblrIE from .tumblr import TumblrIE
from .tutv import TutvIE from .tutv import TutvIE
@ -66,7 +68,7 @@ from .yahoo import YahooIE, YahooSearchIE
from .youjizz import YouJizzIE from .youjizz import YouJizzIE
from .youku import YoukuIE from .youku import YoukuIE
from .youporn import YouPornIE from .youporn import YouPornIE
from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE, YoutubeShowIE
from .zdf import ZDFIE from .zdf import ZDFIE

View File

@ -12,8 +12,7 @@ from ..utils import (
class ComedyCentralIE(InfoExtractor): class ComedyCentralIE(InfoExtractor):
"""Information extractor for The Daily Show and Colbert Report """ IE_DESC = u'The Daily Show / Colbert Report'
# urls can be abbreviations like :thedailyshow or :colbert # urls can be abbreviations like :thedailyshow or :colbert
# urls for episodes like: # urls for episodes like:
# or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day

View File

@ -263,3 +263,7 @@ class SearchInfoExtractor(InfoExtractor):
def _get_n_results(self, query, n): def _get_n_results(self, query, n):
"""Get a specified number of results for a query""" """Get a specified number of results for a query"""
raise NotImplementedError("This method must be implemented by sublclasses") raise NotImplementedError("This method must be implemented by sublclasses")
@property
def SEARCH_KEY(self):
return self._SEARCH_KEY

View File

@ -11,8 +11,7 @@ from ..utils import (
) )
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
"""Generic last-resort information extractor.""" IE_DESC = u'Generic downloader that works on some sites'
_VALID_URL = r'.*' _VALID_URL = r'.*'
IE_NAME = u'generic' IE_NAME = u'generic'
_TEST = { _TEST = {
@ -144,7 +143,7 @@ class GenericIE(InfoExtractor):
# Video Title - Tagline | Site Name # Video Title - Tagline | Site Name
# and so on and so forth; it's just not practical # and so on and so forth; it's just not practical
video_title = self._html_search_regex(r'<title>(.*)</title>', video_title = self._html_search_regex(r'<title>(.*)</title>',
webpage, u'video title', default=u'video') webpage, u'video title', default=u'video', flags=re.DOTALL)
# video uploader is domain name # video uploader is domain name
video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*', video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',

View File

@ -10,8 +10,7 @@ from ..utils import (
class GooglePlusIE(InfoExtractor): class GooglePlusIE(InfoExtractor):
"""Information extractor for plus.google.com.""" IE_DESC = u'Google Plus'
_VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)' _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
IE_NAME = u'plus.google' IE_NAME = u'plus.google'
_TEST = { _TEST = {

View File

@ -8,7 +8,7 @@ from ..utils import (
class GoogleSearchIE(SearchInfoExtractor): class GoogleSearchIE(SearchInfoExtractor):
"""Information Extractor for Google Video search queries.""" IE_DESC = u'Google Video search'
_MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"' _MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"'
_MAX_RESULTS = 1000 _MAX_RESULTS = 1000
IE_NAME = u'video.google:search' IE_NAME = u'video.google:search'

View File

@ -0,0 +1,42 @@
import re
from .common import InfoExtractor
class InstagramIE(InfoExtractor):
_VALID_URL = r'(?:http://)?instagram.com/p/(.*?)/'
_TEST = {
u'url': u'http://instagram.com/p/aye83DjauH/#',
u'file': u'aye83DjauH.mp4',
u'md5': u'0d2da106a9d2631273e192b372806516',
u'info_dict': {
u"uploader_id": u"naomipq",
u"title": u"Video by naomipq"
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(
r'<meta property="og:video" content="(.+?)"',
webpage, u'video URL')
thumbnail_url = self._html_search_regex(
r'<meta property="og:image" content="(.+?)" />',
webpage, u'thumbnail URL', fatal=False)
html_title = self._html_search_regex(
r'<title>(.+?)</title>',
webpage, u'title', flags=re.DOTALL)
title = re.sub(u'(?: *\(Videos?\))? \u2022 Instagram$', '', html_title).strip()
uploader_id = self._html_search_regex(r'content="(.*?)\'s video on Instagram',
webpage, u'uploader name', fatal=False)
ext = 'mp4'
return [{
'id': video_id,
'url': video_url,
'ext': ext,
'title': title,
'thumbnail': thumbnail_url,
'uploader_id' : uploader_id
}]

View File

@ -16,10 +16,9 @@ from ..utils import (
class StanfordOpenClassroomIE(InfoExtractor): class StanfordOpenClassroomIE(InfoExtractor):
"""Information extractor for Stanford's Open ClassRoom"""
_VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
IE_NAME = u'stanfordoc' IE_NAME = u'stanfordoc'
IE_DESC = u'Stanford Open ClassRoom'
_VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
_TEST = { _TEST = {
u'url': u'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100', u'url': u'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
u'file': u'PracticalUnix_intro-environment.mp4', u'file': u'PracticalUnix_intro-environment.mp4',

View File

@ -0,0 +1,49 @@
import re
from .common import InfoExtractor
class TrailerAddictIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/trailer/([^/]+)/(?:trailer|feature-trailer)'
_TEST = {
u'url': u'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
u'file': u'76184.mp4',
u'md5': u'41365557f3c8c397d091da510e73ceb4',
u'info_dict': {
u"title": u"Prince Avalanche Trailer",
u"description": u"Trailer for Prince Avalanche.Two highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind."
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
title = self._search_regex(r'<title>(.+?)</title>',
webpage, 'video title').replace(' - Trailer Addict','')
view_count = self._search_regex(r'Views: (.+?)<br />',
webpage, 'Views Count')
description = self._search_regex(r'<meta property="og:description" content="(.+?)" />',
webpage, 'video description')
video_id = self._search_regex(r'<meta property="og:video" content="(.+?)" />',
webpage, 'Video id').split('=')[1]
info_url = "http://www.traileraddict.com/fvar.php?tid=%s" %(str(video_id))
info_webpage = self._download_webpage(info_url, video_id , "Downloading the info webpage")
final_url = self._search_regex(r'&fileurl=(.+)',
info_webpage, 'Download url').replace('%3F','?')
thumbnail_url = self._search_regex(r'&image=(.+?)&',
info_webpage, 'thumbnail url')
ext = final_url.split('.')[-1].split('?')[0]
return [{
'id' : video_id,
'url' : final_url,
'ext' : ext,
'title' : title,
'thumbnail' : thumbnail_url,
'description' : description,
'view_count' : view_count,
}]

View File

@ -11,7 +11,7 @@ from ..utils import (
) )
class YahooIE(InfoExtractor): class YahooIE(InfoExtractor):
"""Information extractor for screen.yahoo.com.""" IE_DESC = u'Yahoo screen'
_VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html' _VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
_TEST = { _TEST = {
u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
@ -88,8 +88,7 @@ class YahooIE(InfoExtractor):
return info_dict return info_dict
class YahooSearchIE(SearchInfoExtractor): class YahooSearchIE(SearchInfoExtractor):
"""Information Extractor for Yahoo! Video search queries.""" IE_DESC = u'Yahoo screen search'
_MAX_RESULTS = 1000 _MAX_RESULTS = 1000
IE_NAME = u'screen.yahoo:search' IE_NAME = u'screen.yahoo:search'
_SEARCH_KEY = 'yvsearch' _SEARCH_KEY = 'yvsearch'

View File

@ -23,8 +23,7 @@ from ..utils import (
class YoutubeIE(InfoExtractor): class YoutubeIE(InfoExtractor):
"""Information extractor for youtube.com.""" IE_DESC = u'YouTube.com'
_VALID_URL = r"""^ _VALID_URL = r"""^
( (
(?:https?://)? # http(s):// (optional) (?:https?://)? # http(s):// (optional)
@ -34,7 +33,7 @@ class YoutubeIE(InfoExtractor):
(?: # the various things that can precede the ID: (?: # the various things that can precede the ID:
(?:(?:v|embed|e)/) # v/ or embed/ or e/ (?:(?:v|embed|e)/) # v/ or embed/ or e/
|(?: # or the v= param in all its forms |(?: # or the v= param in all its forms
(?:watch(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx) (?:watch|movie(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
(?:\?|\#!?) # the params delimiter ? or # or #! (?:\?|\#!?) # the params delimiter ? or # or #!
(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx) (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
v= v=
@ -402,6 +401,9 @@ class YoutubeIE(InfoExtractor):
return video_id return video_id
def _real_extract(self, url): def _real_extract(self, url):
if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).')
# Extract original video URL from URL with redirection, like age verification, using next_url parameter # Extract original video URL from URL with redirection, like age verification, using next_url parameter
mobj = re.search(self._NEXT_URL_RE, url) mobj = re.search(self._NEXT_URL_RE, url)
if mobj: if mobj:
@ -583,7 +585,7 @@ class YoutubeIE(InfoExtractor):
if req_format is None or req_format == 'best': if req_format is None or req_format == 'best':
video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
elif req_format == 'worst': elif req_format == 'worst':
video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
elif req_format in ('-1', 'all'): elif req_format in ('-1', 'all'):
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
else: else:
@ -626,8 +628,7 @@ class YoutubeIE(InfoExtractor):
return results return results
class YoutubePlaylistIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor):
"""Information Extractor for YouTube playlists.""" IE_DESC = u'YouTube.com playlists'
_VALID_URL = r"""(?: _VALID_URL = r"""(?:
(?:https?://)? (?:https?://)?
(?:\w+\.)? (?:\w+\.)?
@ -694,8 +695,7 @@ class YoutubePlaylistIE(InfoExtractor):
class YoutubeChannelIE(InfoExtractor): class YoutubeChannelIE(InfoExtractor):
"""Information Extractor for YouTube channels.""" IE_DESC = u'YouTube.com channels'
_VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)" _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
_TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en' _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
_MORE_PAGES_INDICATOR = 'yt-uix-load-more' _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
@ -753,8 +753,7 @@ class YoutubeChannelIE(InfoExtractor):
class YoutubeUserIE(InfoExtractor): class YoutubeUserIE(InfoExtractor):
"""Information Extractor for YouTube users.""" IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)' _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
_TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
_GDATA_PAGE_SIZE = 50 _GDATA_PAGE_SIZE = 50
@ -810,7 +809,7 @@ class YoutubeUserIE(InfoExtractor):
return [self.playlist_result(url_results, playlist_title = username)] return [self.playlist_result(url_results, playlist_title = username)]
class YoutubeSearchIE(SearchInfoExtractor): class YoutubeSearchIE(SearchInfoExtractor):
"""Information Extractor for YouTube search queries.""" IE_DESC = u'YouTube.com searches'
_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc' _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
_MAX_RESULTS = 1000 _MAX_RESULTS = 1000
IE_NAME = u'youtube:search' IE_NAME = u'youtube:search'
@ -850,3 +849,18 @@ class YoutubeSearchIE(SearchInfoExtractor):
video_ids = video_ids[:n] video_ids = video_ids[:n]
videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids] videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
return self.playlist_result(videos, query) return self.playlist_result(videos, query)
class YoutubeShowIE(InfoExtractor):
IE_DESC = u'YouTube.com (multi-season) shows'
_VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
IE_NAME = u'youtube:show'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
show_name = mobj.group(1)
webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
# There's one playlist for each season of the show
m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]

View File

@ -44,7 +44,7 @@ def update_self(to_screen, verbose, filename):
if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, "frozen"): if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, "frozen"):
to_screen(u'It looks like you installed youtube-dl with pip, setup.py or a tarball. Please use that to update.') to_screen(u'It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.')
return return
# Check if there is a new version # Check if there is a new version