Merge branch 'master' into use-other-downloaders
This commit is contained in:
commit
ab52469ecf
@ -34,10 +34,10 @@ def try_rm(filename):
|
|||||||
|
|
||||||
|
|
||||||
class FakeYDL(YoutubeDL):
|
class FakeYDL(YoutubeDL):
|
||||||
def __init__(self):
|
def __init__(self, override=None):
|
||||||
# Different instances of the downloader can't share the same dictionary
|
# Different instances of the downloader can't share the same dictionary
|
||||||
# some test set the "sublang" parameter, which would break the md5 checks.
|
# some test set the "sublang" parameter, which would break the md5 checks.
|
||||||
params = get_params()
|
params = get_params(override=override)
|
||||||
super(FakeYDL, self).__init__(params)
|
super(FakeYDL, self).__init__(params)
|
||||||
self.result = []
|
self.result = []
|
||||||
|
|
||||||
|
99
test/test_YoutubeDL.py
Normal file
99
test/test_YoutubeDL.py
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from test.helper import FakeYDL
|
||||||
|
|
||||||
|
|
||||||
|
class YDL(FakeYDL):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(YDL, self).__init__(*args, **kwargs)
|
||||||
|
self.downloaded_info_dicts = []
|
||||||
|
self.msgs = []
|
||||||
|
|
||||||
|
def process_info(self, info_dict):
|
||||||
|
self.downloaded_info_dicts.append(info_dict)
|
||||||
|
|
||||||
|
def to_screen(self, msg):
|
||||||
|
self.msgs.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
class TestFormatSelection(unittest.TestCase):
|
||||||
|
def test_prefer_free_formats(self):
|
||||||
|
# Same resolution => download webm
|
||||||
|
ydl = YDL()
|
||||||
|
ydl.params['prefer_free_formats'] = True
|
||||||
|
formats = [
|
||||||
|
{u'ext': u'webm', u'height': 460},
|
||||||
|
{u'ext': u'mp4', u'height': 460},
|
||||||
|
]
|
||||||
|
info_dict = {u'formats': formats, u'extractor': u'test'}
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded[u'ext'], u'webm')
|
||||||
|
|
||||||
|
# Different resolution => download best quality (mp4)
|
||||||
|
ydl = YDL()
|
||||||
|
ydl.params['prefer_free_formats'] = True
|
||||||
|
formats = [
|
||||||
|
{u'ext': u'webm', u'height': 720},
|
||||||
|
{u'ext': u'mp4', u'height': 1080},
|
||||||
|
]
|
||||||
|
info_dict[u'formats'] = formats
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded[u'ext'], u'mp4')
|
||||||
|
|
||||||
|
# No prefer_free_formats => keep original formats order
|
||||||
|
ydl = YDL()
|
||||||
|
ydl.params['prefer_free_formats'] = False
|
||||||
|
formats = [
|
||||||
|
{u'ext': u'webm', u'height': 720},
|
||||||
|
{u'ext': u'flv', u'height': 720},
|
||||||
|
]
|
||||||
|
info_dict[u'formats'] = formats
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded[u'ext'], u'flv')
|
||||||
|
|
||||||
|
def test_format_limit(self):
|
||||||
|
formats = [
|
||||||
|
{u'format_id': u'meh'},
|
||||||
|
{u'format_id': u'good'},
|
||||||
|
{u'format_id': u'great'},
|
||||||
|
{u'format_id': u'excellent'},
|
||||||
|
]
|
||||||
|
info_dict = {
|
||||||
|
u'formats': formats, u'extractor': u'test', 'id': 'testvid'}
|
||||||
|
|
||||||
|
ydl = YDL()
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded[u'format_id'], u'excellent')
|
||||||
|
|
||||||
|
ydl = YDL({'format_limit': 'good'})
|
||||||
|
assert ydl.params['format_limit'] == 'good'
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded[u'format_id'], u'good')
|
||||||
|
|
||||||
|
ydl = YDL({'format_limit': 'great', 'format': 'all'})
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
self.assertEqual(ydl.downloaded_info_dicts[0][u'format_id'], u'meh')
|
||||||
|
self.assertEqual(ydl.downloaded_info_dicts[1][u'format_id'], u'good')
|
||||||
|
self.assertEqual(ydl.downloaded_info_dicts[2][u'format_id'], u'great')
|
||||||
|
self.assertTrue('3' in ydl.msgs[0])
|
||||||
|
|
||||||
|
ydl = YDL()
|
||||||
|
ydl.params['format_limit'] = 'excellent'
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded[u'format_id'], u'excellent')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
@ -390,13 +390,7 @@ class YoutubeDL(object):
|
|||||||
result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
|
result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
|
||||||
if result_type == 'video':
|
if result_type == 'video':
|
||||||
ie_result.update(extra_info)
|
ie_result.update(extra_info)
|
||||||
if 'playlist' not in ie_result:
|
return self.process_video_result(ie_result)
|
||||||
# It isn't part of a playlist
|
|
||||||
ie_result['playlist'] = None
|
|
||||||
ie_result['playlist_index'] = None
|
|
||||||
if download:
|
|
||||||
self.process_info(ie_result)
|
|
||||||
return ie_result
|
|
||||||
elif result_type == 'url':
|
elif result_type == 'url':
|
||||||
# We have to add extra_info to the results because it may be
|
# We have to add extra_info to the results because it may be
|
||||||
# contained in a playlist
|
# contained in a playlist
|
||||||
@ -454,6 +448,91 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
raise Exception('Invalid result type: %s' % result_type)
|
raise Exception('Invalid result type: %s' % result_type)
|
||||||
|
|
||||||
|
def process_video_result(self, info_dict, download=True):
|
||||||
|
assert info_dict.get('_type', 'video') == 'video'
|
||||||
|
|
||||||
|
if 'playlist' not in info_dict:
|
||||||
|
# It isn't part of a playlist
|
||||||
|
info_dict['playlist'] = None
|
||||||
|
info_dict['playlist_index'] = None
|
||||||
|
|
||||||
|
# This extractors handle format selection themselves
|
||||||
|
if info_dict['extractor'] in [u'youtube', u'Youku', u'YouPorn', u'mixcloud']:
|
||||||
|
self.process_info(info_dict)
|
||||||
|
return info_dict
|
||||||
|
|
||||||
|
# We now pick which formats have to be downloaded
|
||||||
|
if info_dict.get('formats') is None:
|
||||||
|
# There's only one format available
|
||||||
|
formats = [info_dict]
|
||||||
|
else:
|
||||||
|
formats = info_dict['formats']
|
||||||
|
|
||||||
|
# We check that all the formats have the format and format_id fields
|
||||||
|
for (i, format) in enumerate(formats):
|
||||||
|
if format.get('format') is None:
|
||||||
|
if format.get('height') is not None:
|
||||||
|
if format.get('width') is not None:
|
||||||
|
format_desc = u'%sx%s' % (format['width'], format['height'])
|
||||||
|
else:
|
||||||
|
format_desc = u'%sp' % format['height']
|
||||||
|
else:
|
||||||
|
format_desc = '???'
|
||||||
|
format['format'] = format_desc
|
||||||
|
if format.get('format_id') is None:
|
||||||
|
format['format_id'] = compat_str(i)
|
||||||
|
|
||||||
|
if self.params.get('listformats', None):
|
||||||
|
self.list_formats(info_dict)
|
||||||
|
return
|
||||||
|
|
||||||
|
format_limit = self.params.get('format_limit', None)
|
||||||
|
if format_limit:
|
||||||
|
formats = list(takewhile_inclusive(
|
||||||
|
lambda f: f['format_id'] != format_limit, formats
|
||||||
|
))
|
||||||
|
if self.params.get('prefer_free_formats'):
|
||||||
|
def _free_formats_key(f):
|
||||||
|
try:
|
||||||
|
ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
|
||||||
|
except ValueError:
|
||||||
|
ext_ord = -1
|
||||||
|
# We only compare the extension if they have the same height and width
|
||||||
|
return (f.get('height'), f.get('width'), ext_ord)
|
||||||
|
formats = sorted(formats, key=_free_formats_key)
|
||||||
|
|
||||||
|
req_format = self.params.get('format', 'best')
|
||||||
|
formats_to_download = []
|
||||||
|
if req_format == 'best' or req_format is None:
|
||||||
|
formats_to_download = [formats[-1]]
|
||||||
|
elif req_format == 'worst':
|
||||||
|
formats_to_download = [formats[0]]
|
||||||
|
# The -1 is for supporting YoutubeIE
|
||||||
|
elif req_format in ('-1', 'all'):
|
||||||
|
formats_to_download = formats
|
||||||
|
else:
|
||||||
|
# We can accept formats requestd in the format: 34/10/5, we pick
|
||||||
|
# the first that is available, starting from left
|
||||||
|
req_formats = req_format.split('/')
|
||||||
|
for rf in req_formats:
|
||||||
|
matches = filter(lambda f:f['format_id'] == rf ,formats)
|
||||||
|
if matches:
|
||||||
|
formats_to_download = [matches[0]]
|
||||||
|
break
|
||||||
|
if not formats_to_download:
|
||||||
|
raise ExtractorError(u'requested format not available')
|
||||||
|
|
||||||
|
if download:
|
||||||
|
if len(formats_to_download) > 1:
|
||||||
|
self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
|
||||||
|
for format in formats_to_download:
|
||||||
|
new_info = dict(info_dict)
|
||||||
|
new_info.update(format)
|
||||||
|
self.process_info(new_info)
|
||||||
|
# We update the info dict with the best quality format (backwards compatibility)
|
||||||
|
info_dict.update(formats_to_download[-1])
|
||||||
|
return info_dict
|
||||||
|
|
||||||
def process_info(self, info_dict):
|
def process_info(self, info_dict):
|
||||||
"""Process a single resolved IE result."""
|
"""Process a single resolved IE result."""
|
||||||
|
|
||||||
@ -672,3 +751,17 @@ class YoutubeDL(object):
|
|||||||
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
|
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
|
||||||
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
|
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
|
||||||
archive_file.write(vid_id + u'\n')
|
archive_file.write(vid_id + u'\n')
|
||||||
|
|
||||||
|
def list_formats(self, info_dict):
|
||||||
|
formats_s = []
|
||||||
|
for format in info_dict.get('formats', [info_dict]):
|
||||||
|
formats_s.append("%s\t:\t%s\t[%s]" % (format['format_id'],
|
||||||
|
format['ext'],
|
||||||
|
format.get('format', '???'),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if len(formats_s) != 1:
|
||||||
|
formats_s[0] += ' (worst)'
|
||||||
|
formats_s[-1] += ' (best)'
|
||||||
|
formats_s = "\n".join(formats_s)
|
||||||
|
self.to_screen(u"[info] Available formats for %s:\nformat code\textension\n%s" % (info_dict['id'], formats_s))
|
||||||
|
@ -196,7 +196,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
|
general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
|
||||||
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
|
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
|
||||||
general.add_option(
|
general.add_option(
|
||||||
'--cache-dir', dest='cachedir', default=get_cachedir(),
|
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
|
||||||
help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .')
|
help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .')
|
||||||
general.add_option(
|
general.add_option(
|
||||||
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
|
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
|
||||||
@ -235,7 +235,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
|
|
||||||
|
|
||||||
video_format.add_option('-f', '--format',
|
video_format.add_option('-f', '--format',
|
||||||
action='store', dest='format', metavar='FORMAT',
|
action='store', dest='format', metavar='FORMAT', default='best',
|
||||||
help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
|
help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
|
||||||
video_format.add_option('--all-formats',
|
video_format.add_option('--all-formats',
|
||||||
action='store_const', dest='format', help='download all available video formats', const='all')
|
action='store_const', dest='format', help='download all available video formats', const='all')
|
||||||
|
@ -365,7 +365,7 @@ class SearchInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
def _get_n_results(self, query, n):
|
def _get_n_results(self, query, n):
|
||||||
"""Get a specified number of results for a query"""
|
"""Get a specified number of results for a query"""
|
||||||
raise NotImplementedError("This method must be implemented by sublclasses")
|
raise NotImplementedError("This method must be implemented by subclasses")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def SEARCH_KEY(self):
|
def SEARCH_KEY(self):
|
||||||
|
@ -142,12 +142,19 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
# Look for embedded Vimeo player
|
# Look for embedded Vimeo player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe\s+src="(https?://player.vimeo.com/video/.*?)"', webpage)
|
r'<iframe[^>]+?src="(https?://player.vimeo.com/video/.+?)"', webpage)
|
||||||
if mobj:
|
if mobj:
|
||||||
player_url = unescapeHTML(mobj.group(1))
|
player_url = unescapeHTML(mobj.group(1))
|
||||||
surl = smuggle_url(player_url, {'Referer': url})
|
surl = smuggle_url(player_url, {'Referer': url})
|
||||||
return self.url_result(surl, 'Vimeo')
|
return self.url_result(surl, 'Vimeo')
|
||||||
|
|
||||||
|
# Look for embedded YouTube player
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+?src="(https?://(?:www\.)?youtube.com/embed/.+?)"', webpage)
|
||||||
|
if mobj:
|
||||||
|
surl = unescapeHTML(mobj.group(1))
|
||||||
|
return self.url_result(surl, 'Youtube')
|
||||||
|
|
||||||
# Start with something easy: JW Player in SWFObject
|
# Start with something easy: JW Player in SWFObject
|
||||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
|
@ -48,7 +48,8 @@ class TudouIE(InfoExtractor):
|
|||||||
'ie_key': 'Youku'
|
'ie_key': 'Youku'
|
||||||
}
|
}
|
||||||
|
|
||||||
title = self._search_regex(r",kw:['\"](.+?)[\"']", webpage, u'title')
|
title = self._search_regex(
|
||||||
|
r",kw:\s*['\"](.+?)[\"']", webpage, u'title')
|
||||||
thumbnail_url = self._search_regex(
|
thumbnail_url = self._search_regex(
|
||||||
r",pic:\s*[\"'](.+?)[\"']", webpage, u'thumbnail URL', fatal=False)
|
r",pic:\s*[\"'](.+?)[\"']", webpage, u'thumbnail URL', fatal=False)
|
||||||
|
|
||||||
|
@ -947,6 +947,15 @@ def shell_quote(args):
|
|||||||
return ' '.join(map(pipes.quote, args))
|
return ' '.join(map(pipes.quote, args))
|
||||||
|
|
||||||
|
|
||||||
|
def takewhile_inclusive(pred, seq):
|
||||||
|
""" Like itertools.takewhile, but include the latest evaluated element
|
||||||
|
(the first element so that Not pred(e)) """
|
||||||
|
for e in seq:
|
||||||
|
yield e
|
||||||
|
if not pred(e):
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
def smuggle_url(url, data):
|
def smuggle_url(url, data):
|
||||||
""" Pass additional data in a URL for internal use. """
|
""" Pass additional data in a URL for internal use. """
|
||||||
|
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2013.10.17'
|
__version__ = '2013.10.18.1'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user