Merge branch 'master' into use-other-downloaders

This commit is contained in:
Rogério Brito 2013-07-04 09:57:02 -03:00
commit a15f3f40ed
10 changed files with 121 additions and 59 deletions

View File

@ -18,19 +18,13 @@ which means you can modify it, redistribute it or use it however you like.
--version print program version and exit --version print program version and exit
-U, --update update this program to latest version -U, --update update this program to latest version
-i, --ignore-errors continue on download errors -i, --ignore-errors continue on download errors
-r, --rate-limit LIMIT maximum download rate (e.g. 50k or 44.6m)
-R, --retries RETRIES number of retries (default is 10)
--buffer-size SIZE size of download buffer (e.g. 1024 or 16k)
(default is 1024)
--no-resize-buffer do not automatically adjust the buffer size. By
default, the buffer size is automatically resized
from an initial value of SIZE.
--dump-user-agent display the current browser identification --dump-user-agent display the current browser identification
--user-agent UA specify a custom user agent --user-agent UA specify a custom user agent
--referer REF specify a custom referer, use if the video access --referer REF specify a custom referer, use if the video access
is restricted to one domain is restricted to one domain
--list-extractors List all supported extractors and the URLs they --list-extractors List all supported extractors and the URLs they
would handle would handle
--extractor-descriptions Output descriptions of all supported extractors
--proxy URL Use the specified HTTP/HTTPS proxy --proxy URL Use the specified HTTP/HTTPS proxy
--no-check-certificate Suppress HTTPS certificate validation. --no-check-certificate Suppress HTTPS certificate validation.
@ -50,6 +44,15 @@ which means you can modify it, redistribute it or use it however you like.
--datebefore DATE download only videos uploaded before this date --datebefore DATE download only videos uploaded before this date
--dateafter DATE download only videos uploaded after this date --dateafter DATE download only videos uploaded after this date
## Download Options:
-r, --rate-limit LIMIT maximum download rate (e.g. 50k or 44.6m)
-R, --retries RETRIES number of retries (default is 10)
--buffer-size SIZE size of download buffer (e.g. 1024 or 16k)
(default is 1024)
--no-resize-buffer do not automatically adjust the buffer size. By
default, the buffer size is automatically resized
from an initial value of SIZE.
## Filesystem Options: ## Filesystem Options:
-t, --title use title in file name (default) -t, --title use title in file name (default)
--id use only video ID in file name --id use only video ID in file name
@ -194,11 +197,11 @@ Examples:
### Can you please put the -b option back? ### Can you please put the -b option back?
Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the -b option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you''re interested in. In that case, simply request it with the -f option and youtube-dl will try to download it. Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the `-b` option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you're interested in. In that case, simply request it with the `-f` option and youtube-dl will try to download it.
### I get HTTP error 402 when trying to download a video. What's this? ### I get HTTP error 402 when trying to download a video. What's this?
Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We''re [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl. Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl.
### I have downloaded a video but how can I play it? ### I have downloaded a video but how can I play it?

View File

@ -12,8 +12,9 @@ except ImportError:
from distutils.core import setup from distutils.core import setup
try: try:
# This will create an exe that needs Microsoft Visual C++ 2008
# Redistributable Package
import py2exe import py2exe
"""This will create an exe that needs Microsoft Visual C++ 2008 Redistributable Package"""
except ImportError: except ImportError:
if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
print("Cannot import py2exe", file=sys.stderr) print("Cannot import py2exe", file=sys.stderr)
@ -26,13 +27,15 @@ py2exe_options = {
"dist_dir": '.', "dist_dir": '.',
"dll_excludes": ['w9xpopen.exe'], "dll_excludes": ['w9xpopen.exe'],
} }
py2exe_console = [{ py2exe_console = [{
"script": "./youtube_dl/__main__.py", "script": "./youtube_dl/__main__.py",
"dest_base": "youtube-dl", "dest_base": "youtube-dl",
}] }]
py2exe_params = { py2exe_params = {
'console': py2exe_console, 'console': py2exe_console,
'options': { "py2exe": py2exe_options }, 'options': {"py2exe": py2exe_options},
'zipfile': None 'zipfile': None
} }
@ -41,30 +44,34 @@ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
else: else:
params = { params = {
'scripts': ['bin/youtube-dl'], 'scripts': ['bin/youtube-dl'],
'data_files': [('etc/bash_completion.d', ['youtube-dl.bash-completion']), # Installing system-wide would require sudo... 'data_files': [ # Installing system-wide would require sudo...
('share/doc/youtube_dl', ['README.txt']), ('etc/bash_completion.d', ['youtube-dl.bash-completion']),
('share/man/man1/', ['youtube-dl.1'])] ('share/doc/youtube_dl', ['README.txt']),
('share/man/man1/', ['youtube-dl.1'])
]
} }
# Get the version from youtube_dl/version.py without importing the package # Get the version from youtube_dl/version.py without importing the package
exec(compile(open('youtube_dl/version.py').read(), 'youtube_dl/version.py', 'exec')) exec(compile(open('youtube_dl/version.py').read(),
'youtube_dl/version.py', 'exec'))
setup( setup(
name = 'youtube_dl', name='youtube_dl',
version = __version__, version=__version__,
description = 'YouTube video downloader', description='YouTube video downloader',
long_description = 'Small command-line program to download videos from YouTube.com and other video sites.', long_description='Small command-line program to download videos from'
url = 'https://github.com/rg3/youtube-dl', ' YouTube.com and other video sites.',
author = 'Ricardo Garcia', url='https://github.com/rg3/youtube-dl',
maintainer = 'Philipp Hagemeister', author='Ricardo Garcia',
maintainer_email = 'phihag@phihag.de', maintainer='Philipp Hagemeister',
packages = ['youtube_dl', 'youtube_dl.extractor'], maintainer_email='phihag@phihag.de',
packages=['youtube_dl', 'youtube_dl.extractor'],
# Provokes warning on most systems (why?!) # Provokes warning on most systems (why?!)
#test_suite = 'nose.collector', # test_suite = 'nose.collector',
#test_requires = ['nosetest'], # test_requires = ['nosetest'],
classifiers = [ classifiers=[
"Topic :: Multimedia :: Video", "Topic :: Multimedia :: Video",
"Development Status :: 5 - Production/Stable", "Development Status :: 5 - Production/Stable",
"Environment :: Console", "Environment :: Console",

View File

@ -586,14 +586,16 @@ def _real_main(argv=None):
if opts.verbose: if opts.verbose:
ydl.to_screen(u'[debug] youtube-dl version ' + __version__) ydl.to_screen(u'[debug] youtube-dl version ' + __version__)
try: try:
sp = subprocess.Popen(['git', 'rev-parse', '--short', 'HEAD'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, sp = subprocess.Popen(
cwd=os.path.dirname(os.path.abspath(__file__))) ['git', 'rev-parse', '--short', 'HEAD'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
cwd=os.path.dirname(os.path.abspath(__file__)))
out, err = sp.communicate() out, err = sp.communicate()
out = out.decode().strip() out = out.decode().strip()
if re.match('[0-9a-f]+', out): if re.match('[0-9a-f]+', out):
ydl.to_screen(u'[debug] Git HEAD: ' + out) ydl.to_screen(u'[debug] Git HEAD: ' + out)
except: except:
pass sys.exc_clear()
ydl.to_screen(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform())) ydl.to_screen(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()))
ydl.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies)) ydl.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies))

View File

@ -16,7 +16,7 @@ class ArteTvIE(InfoExtractor):
www.arte.tv/guide, the extraction process is different for each one. www.arte.tv/guide, the extraction process is different for each one.
The videos expire in 7 days, so we can't add tests. The videos expire in 7 days, so we can't add tests.
""" """
_EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?:fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?' _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
_VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?:fr|de)/.*-(?P<id>.*?).html' _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?:fr|de)/.*-(?P<id>.*?).html'
_LIVE_URL = r'index-[0-9]+\.html$' _LIVE_URL = r'index-[0-9]+\.html$'
@ -57,10 +57,11 @@ class ArteTvIE(InfoExtractor):
mobj = re.match(self._EMISSION_URL, url) mobj = re.match(self._EMISSION_URL, url)
if mobj is not None: if mobj is not None:
name = mobj.group('name') name = mobj.group('name')
lang = mobj.group('lang')
# This is not a real id, it can be for example AJT for the news # This is not a real id, it can be for example AJT for the news
# http://www.arte.tv/guide/fr/emissions/AJT/arte-journal # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
video_id = mobj.group('id') video_id = mobj.group('id')
return self._extract_emission(url, video_id) return self._extract_emission(url, video_id, lang)
mobj = re.match(self._VIDEOS_URL, url) mobj = re.match(self._VIDEOS_URL, url)
if mobj is not None: if mobj is not None:
@ -72,10 +73,9 @@ class ArteTvIE(InfoExtractor):
# self.extractLiveStream(url) # self.extractLiveStream(url)
# return # return
def _extract_emission(self, url, video_id): def _extract_emission(self, url, video_id, lang):
"""Extract from www.arte.tv/guide""" """Extract from www.arte.tv/guide"""
webpage = self._download_webpage(url, video_id) json_url = 'http://org-www.arte.tv/papi/tvguide/videos/stream/player/F/%s_PLUS7-F/ALL/ALL.json' % video_id
json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
json_info = self._download_webpage(json_url, video_id, 'Downloading info json') json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
self.report_extraction(video_id) self.report_extraction(video_id)
@ -91,6 +91,16 @@ class ArteTvIE(InfoExtractor):
} }
formats = player_info['VSR'].values() formats = player_info['VSR'].values()
def _match_lang(f):
# Return true if that format is in the language of the url
if lang == 'fr':
l = 'F'
elif lang == 'de':
l = 'A'
regexes = [r'VO?%s' % l, r'V%s-ST.' % l]
return any(re.match(r, f['versionCode']) for r in regexes)
# Some formats may not be in the same language as the url
formats = filter(_match_lang, formats)
# We order the formats by quality # We order the formats by quality
formats = sorted(formats, key=lambda f: int(f['height'])) formats = sorted(formats, key=lambda f: int(f['height']))
# Pick the best quality # Pick the best quality

View File

@ -8,6 +8,14 @@ from ..utils import (
) )
class AUEngineIE(InfoExtractor): class AUEngineIE(InfoExtractor):
_TEST = {
u'url': u'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370',
u'file': u'lfvlytY6.mp4',
u'md5': u'48972bdbcf1a3a2f5533e62425b41d4f',
u'info_dict': {
u"title": u"[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]"
}
}
_VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed.php\?.*?file=([^&]+).*?' _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed.php\?.*?file=([^&]+).*?'
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -27,7 +27,7 @@ class BlipTVIE(InfoExtractor):
_TEST = { _TEST = {
u'url': u'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352', u'url': u'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
u'file': u'5779306.m4v', u'file': u'5779306.m4v',
u'md5': u'b2d849efcf7ee18917e4b4d9ff37cafe', u'md5': u'80baf1ec5c3d2019037c1c707d676b9f',
u'info_dict': { u'info_dict': {
u"upload_date": u"20111205", u"upload_date": u"20111205",
u"description": u"md5:9bc31f227219cde65e47eeec8d2dc596", u"description": u"md5:9bc31f227219cde65e47eeec8d2dc596",
@ -103,7 +103,12 @@ class BlipTVIE(InfoExtractor):
data = json_data data = json_data
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
video_url = data['media']['url'] if 'additionalMedia' in data:
formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height']))
best_format = formats[-1]
video_url = best_format['url']
else:
video_url = data['media']['url']
umobj = re.match(self._URL_EXT, video_url) umobj = re.match(self._URL_EXT, video_url)
if umobj is None: if umobj is None:
raise ValueError('Can not determine filename extension') raise ValueError('Can not determine filename extension')

View File

@ -1,24 +1,34 @@
# coding: utf-8
import re import re
import json
from .common import InfoExtractor from .common import InfoExtractor
class TudouIE(InfoExtractor): class TudouIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+)\.html)' _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?'
_TEST = { _TEST = {
u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
u'file': u'159447792.f4v', u'file': u'159448201.f4v',
u'md5': u'ad7c358a01541e926a1e413612c6b10a', u'md5': u'140a49ed444bd22f93330985d8475fcb',
u'info_dict': { u'info_dict': {
u"title": u"\u5361\u9a6c\u4e54\u56fd\u8db3\u5f00\u5927\u811a\u957f\u4f20\u51b2\u540a\u96c6\u9526" u"title": u"卡马乔国足开大脚长传冲吊集锦"
} }
} }
def _url_for_id(self, id, quality = None):
info_url = "http://v2.tudou.com/f?id="+str(id)
if quality:
info_url += '&hd' + quality
webpage = self._download_webpage(info_url, id, "Opening the info webpage")
final_url = self._html_search_regex('>(.+?)</f>',webpage, 'video url')
return final_url
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(2).replace('.html','') video_id = mobj.group(2)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_id = re.search('"k":(.+?),',webpage).group(1)
title = re.search(",kw:\"(.+)\"",webpage) title = re.search(",kw:\"(.+)\"",webpage)
if title is None: if title is None:
title = re.search(",kw: \'(.+)\'",webpage) title = re.search(",kw: \'(.+)\'",webpage)
@ -27,14 +37,27 @@ class TudouIE(InfoExtractor):
if thumbnail_url is None: if thumbnail_url is None:
thumbnail_url = re.search(",pic:\"(.+?)\"",webpage) thumbnail_url = re.search(",pic:\"(.+?)\"",webpage)
thumbnail_url = thumbnail_url.group(1) thumbnail_url = thumbnail_url.group(1)
info_url = "http://v2.tudou.com/f?id="+str(video_id)
webpage = self._download_webpage(info_url, video_id, "Opening the info webpage") segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments')
final_url = re.search('\>(.+?)\<\/f\>',webpage).group(1) segments = json.loads(segs_json)
ext = (final_url.split('?')[0]).split('.')[-1] # It looks like the keys are the arguments that have to be passed as
return [{ # the hd field in the request url, we pick the higher
'id': video_id, quality = sorted(segments.keys())[-1]
'url': final_url, parts = segments[quality]
'ext': ext, result = []
'title': title, len_parts = len(parts)
'thumbnail': thumbnail_url, if len_parts > 1:
}] self.to_screen(u'%s: found %s parts' % (video_id, len_parts))
for part in parts:
part_id = part['k']
final_url = self._url_for_id(part_id, quality)
ext = (final_url.split('?')[0]).split('.')[-1]
part_info = {'id': part_id,
'url': final_url,
'ext': ext,
'title': title,
'thumbnail': thumbnail_url,
}
result.append(part_info)
return result

View File

@ -441,7 +441,7 @@ class YoutubeIE(InfoExtractor):
break break
if 'token' not in video_info: if 'token' not in video_info:
if 'reason' in video_info: if 'reason' in video_info:
raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0]) raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
else: else:
raise ExtractorError(u'"token" parameter not in video info for unknown reason') raise ExtractorError(u'"token" parameter not in video info for unknown reason')

View File

@ -470,10 +470,14 @@ def make_HTTPS_handler(opts):
class ExtractorError(Exception): class ExtractorError(Exception):
"""Error during info extraction.""" """Error during info extraction."""
def __init__(self, msg, tb=None): def __init__(self, msg, tb=None, expected=False):
""" tb, if given, is the original traceback (so that it can be printed out). """ """ tb, if given, is the original traceback (so that it can be printed out).
If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
"""
if not sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
expected = True
if not expected:
msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output.' msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output.'
super(ExtractorError, self).__init__(msg) super(ExtractorError, self).__init__(msg)

View File

@ -1,2 +1,2 @@
__version__ = '2013.06.34.4' __version__ = '2013.07.02'