From 887a227953cfc8acda774064bbe2c22755c5a81d Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Sat, 29 Jun 2013 19:17:27 +0500 Subject: [PATCH 01/12] added an IE and test for traileraddict.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/traileraddict.py | 49 +++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 youtube_dl/extractor/traileraddict.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 8c55f33dc..ec800d9fb 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -47,6 +47,7 @@ from .statigram import StatigramIE from .steam import SteamIE from .teamcoco import TeamcocoIE from .ted import TEDIE +from .traileraddict import TrailerAddictIE from .tudou import TudouIE from .tumblr import TumblrIE from .ustream import UstreamIE diff --git a/youtube_dl/extractor/traileraddict.py b/youtube_dl/extractor/traileraddict.py new file mode 100644 index 000000000..9dd26c163 --- /dev/null +++ b/youtube_dl/extractor/traileraddict.py @@ -0,0 +1,49 @@ +import re + +from .common import InfoExtractor + + +class TrailerAddictIE(InfoExtractor): + _VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/trailer/([^/]+)/(?:trailer|feature-trailer)' + _TEST = { + u'url': u'http://www.traileraddict.com/trailer/prince-avalanche/trailer', + u'file': u'76184.mp4', + u'md5': u'41365557f3c8c397d091da510e73ceb4', + u'info_dict': { + u"title": u"Prince Avalanche Trailer", + u"description": u"Trailer for Prince Avalanche.Two highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind." + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(1) + webpage = self._download_webpage(url, video_id) + + title = self._search_regex(r'(.+?)', + webpage, 'video title').replace(' - Trailer Addict','') + view_count = self._search_regex(r'Views: (.+?)
', + webpage, 'Views Count') + description = self._search_regex(r'', + webpage, 'video description') + video_id = self._search_regex(r'', + webpage, 'Video id').split('=')[1] + + info_url = "http://www.traileraddict.com/fvar.php?tid=%s" %(str(video_id)) + info_webpage = self._download_webpage(info_url, video_id , "Downloading the info webpage") + + final_url = self._search_regex(r'&fileurl=(.+)', + info_webpage, 'Download url').replace('%3F','?') + thumbnail_url = self._search_regex(r'&image=(.+?)&', + info_webpage, 'thumbnail url') + ext = final_url.split('.')[-1].split('?')[0] + + return [{ + 'id' : video_id, + 'url' : final_url, + 'ext' : ext, + 'title' : title, + 'thumbnail' : thumbnail_url, + 'description' : description, + 'view_count' : view_count, + }] From d7f44b5bdba7e4e23401c8ccdf0c238e24c6697d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 1 Jul 2013 02:29:29 +0200 Subject: [PATCH 02/12] [youtube] Warn if URL is most likely wrong (#969) --- youtube_dl/extractor/youtube.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 025afb390..109c8a93f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -402,6 +402,9 @@ class YoutubeIE(InfoExtractor): return video_id def _real_extract(self, url): + if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url): + self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).') + # Extract original video URL from URL with redirection, like age verification, using next_url parameter mobj = re.search(self._NEXT_URL_RE, url) if mobj: From f9bd64c09897ebbd3a3278fe21e7ae798c6fc140 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 1 Jul 2013 02:36:49 +0200 Subject: [PATCH 03/12] [update] Add package manager to error message (#959) --- youtube_dl/update.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/update.py b/youtube_dl/update.py index eab8417a5..ccab6f27f 100644 --- a/youtube_dl/update.py +++ b/youtube_dl/update.py @@ -44,7 +44,7 @@ def update_self(to_screen, verbose, filename): if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, "frozen"): - to_screen(u'It looks like you installed youtube-dl with pip, setup.py or a tarball. Please use that to update.') + to_screen(u'It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.') return # Check if there is a new version From 0beb3add18f8da0e558369074100e9105da82b8e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 1 Jul 2013 14:53:25 +0200 Subject: [PATCH 04/12] Separate downloader options --- youtube_dl/__init__.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 6334ce3c4..98388a9f3 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -118,6 +118,7 @@ def parseOpts(overrideArguments=None): selection = optparse.OptionGroup(parser, 'Video Selection') authentication = optparse.OptionGroup(parser, 'Authentication Options') video_format = optparse.OptionGroup(parser, 'Video Format Options') + downloader = optparse.OptionGroup(parser, 'Download Options') postproc = optparse.OptionGroup(parser, 'Post-processing Options') filesystem = optparse.OptionGroup(parser, 'Filesystem Options') verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') @@ -130,15 +131,6 @@ def parseOpts(overrideArguments=None): action='store_true', dest='update_self', help='update this program to latest version') general.add_option('-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) - general.add_option('-r', '--rate-limit', - dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)') - general.add_option('-R', '--retries', - dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) - general.add_option('--buffer-size', - dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024") - general.add_option('--no-resize-buffer', - action='store_true', dest='noresizebuffer', - help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False) general.add_option('--dump-user-agent', action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False) @@ -152,7 +144,7 @@ def parseOpts(overrideArguments=None): help='List all supported extractors and the URLs they would handle', default=False) general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL') general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') - general.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP) + selection.add_option('--playlist-start', dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is %default)', default=1) @@ -211,6 +203,17 @@ def parseOpts(overrideArguments=None): action='store', dest='subtitleslang', metavar='LANG', help='language of the subtitles to download (optional) use IETF language tags like \'en\'') + downloader.add_option('-r', '--rate-limit', + dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)') + downloader.add_option('-R', '--retries', + dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) + downloader.add_option('--buffer-size', + dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024") + downloader.add_option('--no-resize-buffer', + action='store_true', dest='noresizebuffer', + help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False) + downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP) + verbosity.add_option('-q', '--quiet', action='store_true', dest='quiet', help='activates quiet mode', default=False) verbosity.add_option('-s', '--simulate', @@ -317,6 +320,7 @@ def parseOpts(overrideArguments=None): parser.add_option_group(general) parser.add_option_group(selection) + parser.add_option_group(downloader) parser.add_option_group(filesystem) parser.add_option_group(verbosity) parser.add_option_group(video_format) From bcd6e4bd07fb924e3de37ed46f21c46debdebaa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 1 Jul 2013 16:51:18 +0200 Subject: [PATCH 05/12] YoutubeIE: extract the correct video id for movie URLs (closes #597) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 109c8a93f..656f46f21 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -34,7 +34,7 @@ class YoutubeIE(InfoExtractor): (?: # the various things that can precede the ID: (?:(?:v|embed|e)/) # v/ or embed/ or e/ |(?: # or the v= param in all its forms - (?:watch(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx) + (?:watch|movie(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx) (?:\?|\#!?) # the params delimiter ? or # or #! (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx) v= From d828f3a5500b29f30c702e6aa34add6e29370b2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 1 Jul 2013 17:19:33 +0200 Subject: [PATCH 06/12] YoutubeIE: use a negative index when accessing the last element of the format list --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 656f46f21..6782bbff6 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -586,7 +586,7 @@ class YoutubeIE(InfoExtractor): if req_format is None or req_format == 'best': video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality elif req_format == 'worst': - video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality + video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality elif req_format in ('-1', 'all'): video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats else: From 75dff0eef753f7bbef2947449441f4f6b2e13547 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 1 Jul 2013 17:59:28 +0200 Subject: [PATCH 07/12] [youtube]: add YoutubeShowIE (closes #14) It just extracts the playlists urls for each season --- test/test_youtube_lists.py | 8 +++++++- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/youtube.py | 14 ++++++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 4486b7eb0..dd9e292b0 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -8,7 +8,7 @@ import json import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE +from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE, YoutubeShowIE from youtube_dl.utils import * from helper import FakeYDL @@ -88,5 +88,11 @@ class TestYoutubeLists(unittest.TestCase): result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0] self.assertEqual(len(result['entries']), 2) + def test_youtube_show(self): + dl = FakeYDL() + ie = YoutubeShowIE(dl) + result = ie.extract('http://www.youtube.com/show/airdisasters') + self.assertTrue(len(result) >= 4) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index ba0e86713..ac2e5f0e7 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -67,7 +67,7 @@ from .yahoo import YahooIE, YahooSearchIE from .youjizz import YouJizzIE from .youku import YoukuIE from .youporn import YouPornIE -from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE +from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE, YoutubeShowIE from .zdf import ZDFIE diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6782bbff6..76b297ea5 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -853,3 +853,17 @@ class YoutubeSearchIE(SearchInfoExtractor): video_ids = video_ids[:n] videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids] return self.playlist_result(videos, query) + + +class YoutubeShowIE(InfoExtractor): + _VALID_URL = r'https?://www\.youtube\.com/show/(.*)' + IE_NAME = u'youtube:show' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + show_name = mobj.group(1) + webpage = self._download_webpage(url, show_name, u'Downloading show webpage') + # There's one playlist for each season of the show + m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage)) + self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons))) + return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons] From 7dba9cd0390f3cc89b1ff12ea1a4db6e8ddfd74b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 1 Jul 2013 18:21:29 +0200 Subject: [PATCH 08/12] Sort IEs alphabetically in --list-extractors --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 98388a9f3..c4b09695c 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -420,7 +420,7 @@ def _real_main(argv=None): extractors = gen_extractors() if opts.list_extractors: - for ie in extractors: + for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()): compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '')) matchedUrls = [url for url in all_urls if ie.suitable(url)] all_urls = [url for url in all_urls if url not in matchedUrls] From 0f818663291752b2084d81aff76d945a43c66a06 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 1 Jul 2013 18:52:19 +0200 Subject: [PATCH 09/12] Add --list-extractor-descriptions (human-readable list of IEs) --- youtube_dl/__init__.py | 16 ++++++++++++++++ youtube_dl/extractor/comedycentral.py | 3 +-- youtube_dl/extractor/common.py | 4 ++++ youtube_dl/extractor/generic.py | 3 +-- youtube_dl/extractor/googleplus.py | 3 +-- youtube_dl/extractor/googlesearch.py | 2 +- youtube_dl/extractor/stanfordoc.py | 5 ++--- youtube_dl/extractor/yahoo.py | 5 ++--- youtube_dl/extractor/youtube.py | 15 ++++++--------- 9 files changed, 34 insertions(+), 22 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index c4b09695c..4f1829214 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -35,6 +35,7 @@ import codecs import getpass import optparse import os +import random import re import shlex import socket @@ -142,6 +143,9 @@ def parseOpts(overrideArguments=None): general.add_option('--list-extractors', action='store_true', dest='list_extractors', help='List all supported extractors and the URLs they would handle', default=False) + general.add_option('--list-extractor-descriptions', + action='store_true', dest='list_extractor_descriptions', + help='Output descriptions of all supported extractors', default=False) general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL') general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') @@ -427,6 +431,18 @@ def _real_main(argv=None): for mu in matchedUrls: compat_print(u' ' + mu) sys.exit(0) + if opts.list_extractor_descriptions: + for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()): + if not ie._WORKING: + continue + desc = getattr(ie, 'IE_DESC', ie.IE_NAME) + if hasattr(ie, 'SEARCH_KEY'): + _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise') + _COUNTS = (u'', u'5', u'10', u'all') + desc += u' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) + compat_print(desc) + sys.exit(0) + # Conflicting, missing and erroneous options if opts.usenetrc and (opts.username is not None or opts.password is not None): diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index d9337f8a1..93d9e3d5e 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -12,8 +12,7 @@ from ..utils import ( class ComedyCentralIE(InfoExtractor): - """Information extractor for The Daily Show and Colbert Report """ - + IE_DESC = u'The Daily Show / Colbert Report' # urls can be abbreviations like :thedailyshow or :colbert # urls for episodes like: # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5c6fd7945..655836ff6 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -263,3 +263,7 @@ class SearchInfoExtractor(InfoExtractor): def _get_n_results(self, query, n): """Get a specified number of results for a query""" raise NotImplementedError("This method must be implemented by sublclasses") + + @property + def SEARCH_KEY(self): + return self._SEARCH_KEY diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 019bbe6e9..6fb9e6f32 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -11,8 +11,7 @@ from ..utils import ( ) class GenericIE(InfoExtractor): - """Generic last-resort information extractor.""" - + IE_DESC = u'Generic downloader that works on some sites' _VALID_URL = r'.*' IE_NAME = u'generic' _TEST = { diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index a8f171afd..9f7fc19a4 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -10,8 +10,7 @@ from ..utils import ( class GooglePlusIE(InfoExtractor): - """Information extractor for plus.google.com.""" - + IE_DESC = u'Google Plus' _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)' IE_NAME = u'plus.google' _TEST = { diff --git a/youtube_dl/extractor/googlesearch.py b/youtube_dl/extractor/googlesearch.py index 21c240e51..f9c88e9b5 100644 --- a/youtube_dl/extractor/googlesearch.py +++ b/youtube_dl/extractor/googlesearch.py @@ -8,7 +8,7 @@ from ..utils import ( class GoogleSearchIE(SearchInfoExtractor): - """Information Extractor for Google Video search queries.""" + IE_DESC = u'Google Video search' _MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"' _MAX_RESULTS = 1000 IE_NAME = u'video.google:search' diff --git a/youtube_dl/extractor/stanfordoc.py b/youtube_dl/extractor/stanfordoc.py index 25a0d09f7..b27838bf9 100644 --- a/youtube_dl/extractor/stanfordoc.py +++ b/youtube_dl/extractor/stanfordoc.py @@ -16,10 +16,9 @@ from ..utils import ( class StanfordOpenClassroomIE(InfoExtractor): - """Information extractor for Stanford's Open ClassRoom""" - - _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P[^&]+)(&video=(?P