diff --git a/test/helper.py b/test/helper.py index 79a0ede48..777119ea5 100644 --- a/test/helper.py +++ b/test/helper.py @@ -34,10 +34,10 @@ def try_rm(filename): class FakeYDL(YoutubeDL): - def __init__(self): + def __init__(self, override=None): # Different instances of the downloader can't share the same dictionary # some test set the "sublang" parameter, which would break the md5 checks. - params = get_params() + params = get_params(override=override) super(FakeYDL, self).__init__(params) self.result = [] diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py new file mode 100644 index 000000000..ba6dc05bc --- /dev/null +++ b/test/test_YoutubeDL.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import FakeYDL + + +class YDL(FakeYDL): + def __init__(self, *args, **kwargs): + super(YDL, self).__init__(*args, **kwargs) + self.downloaded_info_dicts = [] + self.msgs = [] + + def process_info(self, info_dict): + self.downloaded_info_dicts.append(info_dict) + + def to_screen(self, msg): + self.msgs.append(msg) + + +class TestFormatSelection(unittest.TestCase): + def test_prefer_free_formats(self): + # Same resolution => download webm + ydl = YDL() + ydl.params['prefer_free_formats'] = True + formats = [ + {u'ext': u'webm', u'height': 460}, + {u'ext': u'mp4', u'height': 460}, + ] + info_dict = {u'formats': formats, u'extractor': u'test'} + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded[u'ext'], u'webm') + + # Different resolution => download best quality (mp4) + ydl = YDL() + ydl.params['prefer_free_formats'] = True + formats = [ + {u'ext': u'webm', u'height': 720}, + {u'ext': u'mp4', u'height': 1080}, + ] + info_dict[u'formats'] = formats + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded[u'ext'], u'mp4') + + # No prefer_free_formats => keep original formats order + ydl = YDL() + ydl.params['prefer_free_formats'] = False + formats = [ + {u'ext': u'webm', u'height': 720}, + {u'ext': u'flv', u'height': 720}, + ] + info_dict[u'formats'] = formats + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded[u'ext'], u'flv') + + def test_format_limit(self): + formats = [ + {u'format_id': u'meh'}, + {u'format_id': u'good'}, + {u'format_id': u'great'}, + {u'format_id': u'excellent'}, + ] + info_dict = { + u'formats': formats, u'extractor': u'test', 'id': 'testvid'} + + ydl = YDL() + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded[u'format_id'], u'excellent') + + ydl = YDL({'format_limit': 'good'}) + assert ydl.params['format_limit'] == 'good' + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded[u'format_id'], u'good') + + ydl = YDL({'format_limit': 'great', 'format': 'all'}) + ydl.process_ie_result(info_dict) + self.assertEqual(ydl.downloaded_info_dicts[0][u'format_id'], u'meh') + self.assertEqual(ydl.downloaded_info_dicts[1][u'format_id'], u'good') + self.assertEqual(ydl.downloaded_info_dicts[2][u'format_id'], u'great') + self.assertTrue('3' in ydl.msgs[0]) + + ydl = YDL() + ydl.params['format_limit'] = 'excellent' + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded[u'format_id'], u'excellent') + + +if __name__ == '__main__': + unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c8054544a..296c0f992 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -390,13 +390,7 @@ class YoutubeDL(object): result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system if result_type == 'video': ie_result.update(extra_info) - if 'playlist' not in ie_result: - # It isn't part of a playlist - ie_result['playlist'] = None - ie_result['playlist_index'] = None - if download: - self.process_info(ie_result) - return ie_result + return self.process_video_result(ie_result) elif result_type == 'url': # We have to add extra_info to the results because it may be # contained in a playlist @@ -454,6 +448,91 @@ class YoutubeDL(object): else: raise Exception('Invalid result type: %s' % result_type) + def process_video_result(self, info_dict, download=True): + assert info_dict.get('_type', 'video') == 'video' + + if 'playlist' not in info_dict: + # It isn't part of a playlist + info_dict['playlist'] = None + info_dict['playlist_index'] = None + + # This extractors handle format selection themselves + if info_dict['extractor'] in [u'youtube', u'Youku', u'YouPorn', u'mixcloud']: + self.process_info(info_dict) + return info_dict + + # We now pick which formats have to be downloaded + if info_dict.get('formats') is None: + # There's only one format available + formats = [info_dict] + else: + formats = info_dict['formats'] + + # We check that all the formats have the format and format_id fields + for (i, format) in enumerate(formats): + if format.get('format') is None: + if format.get('height') is not None: + if format.get('width') is not None: + format_desc = u'%sx%s' % (format['width'], format['height']) + else: + format_desc = u'%sp' % format['height'] + else: + format_desc = '???' + format['format'] = format_desc + if format.get('format_id') is None: + format['format_id'] = compat_str(i) + + if self.params.get('listformats', None): + self.list_formats(info_dict) + return + + format_limit = self.params.get('format_limit', None) + if format_limit: + formats = list(takewhile_inclusive( + lambda f: f['format_id'] != format_limit, formats + )) + if self.params.get('prefer_free_formats'): + def _free_formats_key(f): + try: + ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext']) + except ValueError: + ext_ord = -1 + # We only compare the extension if they have the same height and width + return (f.get('height'), f.get('width'), ext_ord) + formats = sorted(formats, key=_free_formats_key) + + req_format = self.params.get('format', 'best') + formats_to_download = [] + if req_format == 'best' or req_format is None: + formats_to_download = [formats[-1]] + elif req_format == 'worst': + formats_to_download = [formats[0]] + # The -1 is for supporting YoutubeIE + elif req_format in ('-1', 'all'): + formats_to_download = formats + else: + # We can accept formats requestd in the format: 34/10/5, we pick + # the first that is available, starting from left + req_formats = req_format.split('/') + for rf in req_formats: + matches = filter(lambda f:f['format_id'] == rf ,formats) + if matches: + formats_to_download = [matches[0]] + break + if not formats_to_download: + raise ExtractorError(u'requested format not available') + + if download: + if len(formats_to_download) > 1: + self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download))) + for format in formats_to_download: + new_info = dict(info_dict) + new_info.update(format) + self.process_info(new_info) + # We update the info dict with the best quality format (backwards compatibility) + info_dict.update(formats_to_download[-1]) + return info_dict + def process_info(self, info_dict): """Process a single resolved IE result.""" @@ -672,3 +751,17 @@ class YoutubeDL(object): vid_id = info_dict['extractor'] + u' ' + info_dict['id'] with locked_file(fn, 'a', encoding='utf-8') as archive_file: archive_file.write(vid_id + u'\n') + + def list_formats(self, info_dict): + formats_s = [] + for format in info_dict.get('formats', [info_dict]): + formats_s.append("%s\t:\t%s\t[%s]" % (format['format_id'], + format['ext'], + format.get('format', '???'), + ) + ) + if len(formats_s) != 1: + formats_s[0] += ' (worst)' + formats_s[-1] += ' (best)' + formats_s = "\n".join(formats_s) + self.to_screen(u"[info] Available formats for %s:\nformat code\textension\n%s" % (info_dict['id'], formats_s)) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 39785c918..38f0c2770 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -196,7 +196,7 @@ def parseOpts(overrideArguments=None): general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL') general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') general.add_option( - '--cache-dir', dest='cachedir', default=get_cachedir(), + '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .') general.add_option( '--no-cache-dir', action='store_const', const=None, dest='cachedir', @@ -235,7 +235,7 @@ def parseOpts(overrideArguments=None): video_format.add_option('-f', '--format', - action='store', dest='format', metavar='FORMAT', + action='store', dest='format', metavar='FORMAT', default='best', help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported') video_format.add_option('--all-formats', action='store_const', dest='format', help='download all available video formats', const='all') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2a5a85dc6..d4af3b5eb 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -365,7 +365,7 @@ class SearchInfoExtractor(InfoExtractor): def _get_n_results(self, query, n): """Get a specified number of results for a query""" - raise NotImplementedError("This method must be implemented by sublclasses") + raise NotImplementedError("This method must be implemented by subclasses") @property def SEARCH_KEY(self): diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 89805250c..69e0a7bd2 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -142,12 +142,19 @@ class GenericIE(InfoExtractor): # Look for embedded Vimeo player mobj = re.search( - r']+?src="(https?://player.vimeo.com/video/.+?)"', webpage) if mobj: player_url = unescapeHTML(mobj.group(1)) surl = smuggle_url(player_url, {'Referer': url}) return self.url_result(surl, 'Vimeo') + # Look for embedded YouTube player + mobj = re.search( + r']+?src="(https?://(?:www\.)?youtube.com/embed/.+?)"', webpage) + if mobj: + surl = unescapeHTML(mobj.group(1)) + return self.url_result(surl, 'Youtube') + # Start with something easy: JW Player in SWFObject mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if mobj is None: diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index 79679a14a..7a3891b89 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -48,7 +48,8 @@ class TudouIE(InfoExtractor): 'ie_key': 'Youku' } - title = self._search_regex(r",kw:['\"](.+?)[\"']", webpage, u'title') + title = self._search_regex( + r",kw:\s*['\"](.+?)[\"']", webpage, u'title') thumbnail_url = self._search_regex( r",pic:\s*[\"'](.+?)[\"']", webpage, u'thumbnail URL', fatal=False) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 833f981f2..bfb8f6bcd 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -947,6 +947,15 @@ def shell_quote(args): return ' '.join(map(pipes.quote, args)) +def takewhile_inclusive(pred, seq): + """ Like itertools.takewhile, but include the latest evaluated element + (the first element so that Not pred(e)) """ + for e in seq: + yield e + if not pred(e): + return + + def smuggle_url(url, data): """ Pass additional data in a URL for internal use. """ diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 22a51ffe6..971530f8b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.17' +__version__ = '2013.10.18.1'