diff --git a/.gitignore b/.gitignore
index 24fdb3626..7dd0ad09b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,3 +25,4 @@ updates_key.pem
*.mp4
*.part
test/testdata
+.tox
diff --git a/Makefile b/Makefile
index 85dacfa4c..c6d09932b 100644
--- a/Makefile
+++ b/Makefile
@@ -13,13 +13,13 @@ PYTHON=/usr/bin/env python
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
ifeq ($(PREFIX),/usr)
- SYSCONFDIR=/etc
+ SYSCONFDIR=/etc
else
- ifeq ($(PREFIX),/usr/local)
- SYSCONFDIR=/etc
- else
- SYSCONFDIR=$(PREFIX)/etc
- endif
+ ifeq ($(PREFIX),/usr/local)
+ SYSCONFDIR=/etc
+ else
+ SYSCONFDIR=$(PREFIX)/etc
+ endif
endif
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
@@ -71,6 +71,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
--exclude '*~' \
--exclude '__pycache' \
--exclude '.git' \
+ --exclude 'testdata' \
-- \
bin devscripts test youtube_dl \
CHANGELOG LICENSE README.md README.txt \
diff --git a/README.md b/README.md
index fc8070c37..a2b296613 100644
--- a/README.md
+++ b/README.md
@@ -21,6 +21,8 @@ which means you can modify it, redistribute it or use it however you like.
sudo if needed)
-i, --ignore-errors continue on download errors, for example to to
skip unavailable videos in a playlist
+ --abort-on-error Abort downloading of further videos (in the
+ playlist or the command line) if an error occurs
--dump-user-agent display the current browser identification
--user-agent UA specify a custom user agent
--referer REF specify a custom referer, use if the video access
@@ -30,9 +32,10 @@ which means you can modify it, redistribute it or use it however you like.
--extractor-descriptions Output descriptions of all supported extractors
--proxy URL Use the specified HTTP/HTTPS proxy
--no-check-certificate Suppress HTTPS certificate validation.
- --cache-dir None Location in the filesystem where youtube-dl can
- store downloaded information permanently.
- ~/.youtube-dl/cache by default
+ --cache-dir DIR Location in the filesystem where youtube-dl can
+ store downloaded information permanently. By
+ default $XDG_CACHE_HOME/youtube-dl or ~/.cache
+ /youtube-dl .
--no-cache-dir Disable filesystem caching
## Video Selection:
@@ -50,11 +53,16 @@ which means you can modify it, redistribute it or use it however you like.
--date DATE download only videos uploaded in this date
--datebefore DATE download only videos uploaded before this date
--dateafter DATE download only videos uploaded after this date
+ --no-playlist download only the currently playing video
+ --age-limit YEARS download only videos suitable for the given age
+ --download-archive FILE Download only videos not present in the archive
+ file. Record all downloaded videos in it.
## Download Options:
- -r, --rate-limit LIMIT maximum download rate (e.g. 50k or 44.6m)
+ -r, --rate-limit LIMIT maximum download rate in bytes per second (e.g.
+ 50K or 4.2M)
-R, --retries RETRIES number of retries (default is 10)
- --buffer-size SIZE size of download buffer (e.g. 1024 or 16k)
+ --buffer-size SIZE size of download buffer (e.g. 1024 or 16K)
(default is 1024)
--no-resize-buffer do not automatically adjust the buffer size. By
default, the buffer size is automatically resized
@@ -70,7 +78,10 @@ which means you can modify it, redistribute it or use it however you like.
%(uploader_id)s for the uploader nickname if
different, %(autonumber)s to get an automatically
incremented number, %(ext)s for the filename
- extension, %(upload_date)s for the upload date
+ extension, %(format)s for the format description
+ (like "22 - 1280x720" or "HD"),%(format_id)s for
+ the unique id of the format (like Youtube's
+ itags: "137"),%(upload_date)s for the upload date
(YYYYMMDD), %(extractor)s for the provider
(youtube, metacafe, etc), %(id)s for the video id
, %(playlist)s for the playlist the video is in,
@@ -95,6 +106,7 @@ which means you can modify it, redistribute it or use it however you like.
file modification time
--write-description write video description to a .description file
--write-info-json write video metadata to a .info.json file
+ --write-annotations write video annotations to a .annotation file
--write-thumbnail write thumbnail image to disk
## Verbosity / Simulation Options:
@@ -115,6 +127,8 @@ which means you can modify it, redistribute it or use it however you like.
-v, --verbose print various debugging information
--dump-intermediate-pages print downloaded pages to debug problems(very
verbose)
+ --write-pages Write downloaded pages to files in the current
+ directory
## Video Format Options:
-f, --format FORMAT video format code, specifiy the order of
@@ -161,6 +175,7 @@ which means you can modify it, redistribute it or use it however you like.
processed files are overwritten by default
--embed-subs embed subtitles in the video (only for mp4
videos)
+ --add-metadata add metadata to the files
# CONFIGURATION
diff --git a/devscripts/bash-completion.in b/devscripts/bash-completion.in
index bd10f63c2..ce893fcbe 100644
--- a/devscripts/bash-completion.in
+++ b/devscripts/bash-completion.in
@@ -1,4 +1,4 @@
-__youtube-dl()
+__youtube_dl()
{
local cur prev opts
COMPREPLY=()
@@ -15,4 +15,4 @@ __youtube-dl()
fi
}
-complete -F __youtube-dl youtube-dl
+complete -F __youtube_dl youtube-dl
diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py
new file mode 100644
index 000000000..63401fe18
--- /dev/null
+++ b/devscripts/check-porn.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+
+"""
+This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check
+if we are not 'age_limit' tagging some porn site
+"""
+
+# Allow direct execution
+import os
+import sys
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import get_testcases
+from youtube_dl.utils import compat_urllib_request
+
+for test in get_testcases():
+ try:
+ webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
+ except:
+ print('\nFail: {0}'.format(test['name']))
+ continue
+
+ webpage = webpage.decode('utf8', 'replace')
+
+ if 'porn' in webpage.lower() and ('info_dict' not in test
+ or 'age_limit' not in test['info_dict']
+ or test['info_dict']['age_limit'] != 18):
+ print('\nPotential missing age_limit check: {0}'.format(test['name']))
+
+ elif 'porn' not in webpage.lower() and ('info_dict' in test and
+ 'age_limit' in test['info_dict'] and
+ test['info_dict']['age_limit'] == 18):
+ print('\nPotential false negative: {0}'.format(test['name']))
+
+ else:
+ sys.stdout.write('.')
+ sys.stdout.flush()
+
+print()
diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py
index 33f242480..153e15c8a 100755
--- a/devscripts/gh-pages/update-sites.py
+++ b/devscripts/gh-pages/update-sites.py
@@ -16,10 +16,11 @@ def main():
ie_htmls = []
for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()):
ie_html = '{}'.format(ie.IE_NAME)
- try:
+ ie_desc = getattr(ie, 'IE_DESC', None)
+ if ie_desc is False:
+ continue
+ elif ie_desc is not None:
ie_html += ': {}'.format(ie.IE_DESC)
- except AttributeError:
- pass
if ie.working() == False:
ie_html += ' (Currently broken)'
ie_htmls.append('
{}
'.format(ie_html))
diff --git a/devscripts/release.sh b/devscripts/release.sh
index 796468b4b..2766174c1 100755
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@@ -88,10 +88,6 @@ ROOT=$(pwd)
"$ROOT/devscripts/gh-pages/update-sites.py"
git add *.html *.html.in update
git commit -m "release $version"
- git show HEAD
- read -p "Is it good, can I push? (y/n) " -n 1
- if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
- echo
git push "$ROOT" gh-pages
git push "$ORIGIN_URL" gh-pages
)
diff --git a/setup.py b/setup.py
index 3b6dc2d40..aa7cfca08 100644
--- a/setup.py
+++ b/setup.py
@@ -8,8 +8,10 @@ import sys
try:
from setuptools import setup
+ setuptools_available = True
except ImportError:
from distutils.core import setup
+ setuptools_available = False
try:
# This will create an exe that needs Microsoft Visual C++ 2008
@@ -43,13 +45,16 @@ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
params = py2exe_params
else:
params = {
- 'scripts': ['bin/youtube-dl'],
'data_files': [ # Installing system-wide would require sudo...
('etc/bash_completion.d', ['youtube-dl.bash-completion']),
('share/doc/youtube_dl', ['README.txt']),
('share/man/man1/', ['youtube-dl.1'])
]
}
+ if setuptools_available:
+ params['entry_points'] = {'console_scripts': ['youtube-dl = youtube_dl:main']}
+ else:
+ params['scripts'] = ['bin/youtube-dl']
# Get the version from youtube_dl/version.py without importing the package
exec(compile(open('youtube_dl/version.py').read(),
@@ -63,6 +68,7 @@ setup(
' YouTube.com and other video sites.',
url='https://github.com/rg3/youtube-dl',
author='Ricardo Garcia',
+ author_email='ytdl@yt-dl.org',
maintainer='Philipp Hagemeister',
maintainer_email='phihag@phihag.de',
packages=['youtube_dl', 'youtube_dl.extractor'],
diff --git a/test/__init__.py b/test/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/helper.py b/test/helper.py
index a2b468b50..d7bf7a828 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -1,38 +1,80 @@
+import errno
import io
+import hashlib
import json
import os.path
+import re
+import types
+import sys
import youtube_dl.extractor
-from youtube_dl import YoutubeDL, YoutubeDLHandler
-from youtube_dl.utils import (
- compat_cookiejar,
- compat_urllib_request,
-)
+from youtube_dl import YoutubeDL
+from youtube_dl.utils import preferredencoding
-# General configuration (from __init__, not very elegant...)
-jar = compat_cookiejar.CookieJar()
-cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
-proxy_handler = compat_urllib_request.ProxyHandler()
-opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
-compat_urllib_request.install_opener(opener)
-PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
-with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
- parameters = json.load(pf)
+def global_setup():
+ youtube_dl._setup_opener(timeout=10)
+
+
+def get_params(override=None):
+ PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+ "parameters.json")
+ with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
+ parameters = json.load(pf)
+ if override:
+ parameters.update(override)
+ return parameters
+
+
+def try_rm(filename):
+ """ Remove a file if it exists """
+ try:
+ os.remove(filename)
+ except OSError as ose:
+ if ose.errno != errno.ENOENT:
+ raise
+
+
+def report_warning(message):
+ '''
+ Print the message to stderr, it will be prefixed with 'WARNING:'
+ If stderr is a tty file the 'WARNING:' will be colored
+ '''
+ if sys.stderr.isatty() and os.name != 'nt':
+ _msg_header = u'\033[0;33mWARNING:\033[0m'
+ else:
+ _msg_header = u'WARNING:'
+ output = u'%s %s\n' % (_msg_header, message)
+ if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3:
+ output = output.encode(preferredencoding())
+ sys.stderr.write(output)
+
class FakeYDL(YoutubeDL):
- def __init__(self):
- self.result = []
+ def __init__(self, override=None):
# Different instances of the downloader can't share the same dictionary
# some test set the "sublang" parameter, which would break the md5 checks.
- self.params = dict(parameters)
- def to_screen(self, s):
+ params = get_params(override=override)
+ super(FakeYDL, self).__init__(params)
+ self.result = []
+
+ def to_screen(self, s, skip_eol=None):
print(s)
+
def trouble(self, s, tb=None):
raise Exception(s)
+
def download(self, x):
self.result.append(x)
+ def expect_warning(self, regex):
+ # Silence an expected warning matching a regex
+ old_report_warning = self.report_warning
+ def report_warning(self, message):
+ if re.match(regex, message): return
+ old_report_warning(message)
+ self.report_warning = types.MethodType(report_warning, self)
+
def get_testcases():
for ie in youtube_dl.extractor.gen_extractors():
t = getattr(ie, '_TEST', None)
@@ -42,3 +84,6 @@ def get_testcases():
for t in getattr(ie, '_TESTS', []):
t['name'] = type(ie).__name__[:-len('IE')]
yield t
+
+
+md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
new file mode 100644
index 000000000..58cf9c313
--- /dev/null
+++ b/test/test_YoutubeDL.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import FakeYDL
+
+
+class YDL(FakeYDL):
+ def __init__(self, *args, **kwargs):
+ super(YDL, self).__init__(*args, **kwargs)
+ self.downloaded_info_dicts = []
+ self.msgs = []
+
+ def process_info(self, info_dict):
+ self.downloaded_info_dicts.append(info_dict)
+
+ def to_screen(self, msg):
+ self.msgs.append(msg)
+
+
+class TestFormatSelection(unittest.TestCase):
+ def test_prefer_free_formats(self):
+ # Same resolution => download webm
+ ydl = YDL()
+ ydl.params['prefer_free_formats'] = True
+ formats = [
+ {u'ext': u'webm', u'height': 460},
+ {u'ext': u'mp4', u'height': 460},
+ ]
+ info_dict = {u'formats': formats, u'extractor': u'test'}
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded[u'ext'], u'webm')
+
+ # Different resolution => download best quality (mp4)
+ ydl = YDL()
+ ydl.params['prefer_free_formats'] = True
+ formats = [
+ {u'ext': u'webm', u'height': 720},
+ {u'ext': u'mp4', u'height': 1080},
+ ]
+ info_dict[u'formats'] = formats
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded[u'ext'], u'mp4')
+
+ # No prefer_free_formats => keep original formats order
+ ydl = YDL()
+ ydl.params['prefer_free_formats'] = False
+ formats = [
+ {u'ext': u'webm', u'height': 720},
+ {u'ext': u'flv', u'height': 720},
+ ]
+ info_dict[u'formats'] = formats
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded[u'ext'], u'flv')
+
+ def test_format_limit(self):
+ formats = [
+ {u'format_id': u'meh', u'url': u'http://example.com/meh'},
+ {u'format_id': u'good', u'url': u'http://example.com/good'},
+ {u'format_id': u'great', u'url': u'http://example.com/great'},
+ {u'format_id': u'excellent', u'url': u'http://example.com/exc'},
+ ]
+ info_dict = {
+ u'formats': formats, u'extractor': u'test', 'id': 'testvid'}
+
+ ydl = YDL()
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded[u'format_id'], u'excellent')
+
+ ydl = YDL({'format_limit': 'good'})
+ assert ydl.params['format_limit'] == 'good'
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded[u'format_id'], u'good')
+
+ ydl = YDL({'format_limit': 'great', 'format': 'all'})
+ ydl.process_ie_result(info_dict)
+ self.assertEqual(ydl.downloaded_info_dicts[0][u'format_id'], u'meh')
+ self.assertEqual(ydl.downloaded_info_dicts[1][u'format_id'], u'good')
+ self.assertEqual(ydl.downloaded_info_dicts[2][u'format_id'], u'great')
+ self.assertTrue('3' in ydl.msgs[0])
+
+ ydl = YDL()
+ ydl.params['format_limit'] = 'excellent'
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded[u'format_id'], u'excellent')
+
+ def test_format_selection(self):
+ formats = [
+ {u'format_id': u'35', u'ext': u'mp4'},
+ {u'format_id': u'45', u'ext': u'webm'},
+ {u'format_id': u'47', u'ext': u'webm'},
+ {u'format_id': u'2', u'ext': u'flv'},
+ ]
+ info_dict = {u'formats': formats, u'extractor': u'test'}
+
+ ydl = YDL({'format': u'20/47'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], u'47')
+
+ ydl = YDL({'format': u'20/71/worst'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], u'35')
+
+ ydl = YDL()
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], u'2')
+
+ ydl = YDL({'format': u'webm/mp4'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], u'47')
+
+ ydl = YDL({'format': u'3gp/40/mp4'})
+ ydl.process_ie_result(info_dict)
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], u'35')
+
+ def test_add_extra_info(self):
+ test_dict = {
+ 'extractor': 'Foo',
+ }
+ extra_info = {
+ 'extractor': 'Bar',
+ 'playlist': 'funny videos',
+ }
+ YDL.add_extra_info(test_dict, extra_info)
+ self.assertEqual(test_dict['extractor'], 'Foo')
+ self.assertEqual(test_dict['playlist'], 'funny videos')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py
new file mode 100644
index 000000000..d500c6edc
--- /dev/null
+++ b/test/test_age_restriction.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import global_setup, try_rm
+global_setup()
+
+
+from youtube_dl import YoutubeDL
+
+
+def _download_restricted(url, filename, age):
+ """ Returns true iff the file has been downloaded """
+
+ params = {
+ 'age_limit': age,
+ 'skip_download': True,
+ 'writeinfojson': True,
+ "outtmpl": "%(id)s.%(ext)s",
+ }
+ ydl = YoutubeDL(params)
+ ydl.add_default_info_extractors()
+ json_filename = filename + '.info.json'
+ try_rm(json_filename)
+ ydl.download([url])
+ res = os.path.exists(json_filename)
+ try_rm(json_filename)
+ return res
+
+
+class TestAgeRestriction(unittest.TestCase):
+ def _assert_restricted(self, url, filename, age, old_age=None):
+ self.assertTrue(_download_restricted(url, filename, old_age))
+ self.assertFalse(_download_restricted(url, filename, age))
+
+ def test_youtube(self):
+ self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10)
+
+ def test_youporn(self):
+ self._assert_restricted(
+ 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
+ '505835.mp4', 2, old_age=25)
+
+ def test_pornotube(self):
+ self._assert_restricted(
+ 'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
+ '1689755.flv', 13)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_all_urls.py b/test/test_all_urls.py
index ff1c86efe..56e5f80e1 100644
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -1,14 +1,20 @@
#!/usr/bin/env python
-import sys
-import unittest
-
# Allow direct execution
import os
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from test.helper import get_testcases
+
+from youtube_dl.extractor import (
+ gen_extractors,
+ JustinTVIE,
+ YoutubeIE,
+)
-from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE, JustinTVIE, gen_extractors
-from helper import get_testcases
class TestAllURLsMatching(unittest.TestCase):
def setUp(self):
diff --git a/test/test_dailymotion_subtitles.py b/test/test_dailymotion_subtitles.py
index 83c65d57e..ba3580ea4 100644
--- a/test/test_dailymotion_subtitles.py
+++ b/test/test_dailymotion_subtitles.py
@@ -1,20 +1,16 @@
#!/usr/bin/env python
-import sys
-import unittest
-import json
-import io
-import hashlib
-
# Allow direct execution
import os
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import FakeYDL, global_setup, md5
+global_setup()
+
from youtube_dl.extractor import DailymotionIE
-from youtube_dl.utils import *
-from helper import FakeYDL
-
-md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
class TestDailymotionSubtitles(unittest.TestCase):
def setUp(self):
@@ -26,7 +22,7 @@ class TestDailymotionSubtitles(unittest.TestCase):
return info_dict
def getSubtitles(self):
info_dict = self.getInfoDict()
- return info_dict[0]['subtitles']
+ return info_dict['subtitles']
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
@@ -45,15 +41,18 @@ class TestDailymotionSubtitles(unittest.TestCase):
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 5)
def test_list_subtitles(self):
+ self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
+ self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
def test_nosubtitles(self):
+ self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
diff --git a/test/test_download.py b/test/test_download.py
index 23a66254d..73379beb1 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -1,43 +1,39 @@
#!/usr/bin/env python
-import errno
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import (
+ get_params,
+ get_testcases,
+ global_setup,
+ try_rm,
+ md5,
+ report_warning
+)
+global_setup()
+
+
import hashlib
import io
-import os
import json
-import unittest
-import sys
import socket
-import binascii
-
-# Allow direct execution
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import youtube_dl.YoutubeDL
-from youtube_dl.utils import *
-
-PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
+from youtube_dl.utils import (
+ compat_str,
+ compat_urllib_error,
+ compat_HTTPError,
+ DownloadError,
+ ExtractorError,
+ UnavailableVideoError,
+)
RETRIES = 3
-# General configuration (from __init__, not very elegant...)
-jar = compat_cookiejar.CookieJar()
-cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
-proxy_handler = compat_urllib_request.ProxyHandler()
-opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
-compat_urllib_request.install_opener(opener)
-socket.setdefaulttimeout(10)
-
-def _try_rm(filename):
- """ Remove a file if it exists """
- try:
- os.remove(filename)
- except OSError as ose:
- if ose.errno != errno.ENOENT:
- raise
-
-md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
-
class YoutubeDL(youtube_dl.YoutubeDL):
def __init__(self, *args, **kwargs):
self.to_stderr = self.to_screen
@@ -54,17 +50,12 @@ def _file_md5(fn):
with open(fn, 'rb') as f:
return hashlib.md5(f.read()).hexdigest()
-from helper import get_testcases
defs = get_testcases()
-with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
- parameters = json.load(pf)
-
class TestDownload(unittest.TestCase):
maxDiff = None
def setUp(self):
- self.parameters = parameters
self.defs = defs
### Dynamically generate tests
@@ -77,15 +68,17 @@ def generator(test_case):
if not ie._WORKING:
print_skipping('IE marked as not _WORKING')
return
- if 'playlist' not in test_case and not test_case['file']:
- print_skipping('No output file specified')
- return
+ if 'playlist' not in test_case:
+ info_dict = test_case.get('info_dict', {})
+ if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
+ print_skipping('The output file cannot be know, the "file" '
+ 'key is missing or the info_dict is incomplete')
+ return
if 'skip' in test_case:
print_skipping(test_case['skip'])
return
- params = self.parameters.copy()
- params.update(test_case.get('params', {}))
+ params = get_params(test_case.get('params', {}))
ydl = YoutubeDL(params)
ydl.add_default_info_extractors()
@@ -95,35 +88,47 @@ def generator(test_case):
finished_hook_called.add(status['filename'])
ydl.fd.add_progress_hook(_hook)
+ def get_tc_filename(tc):
+ return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
+
test_cases = test_case.get('playlist', [test_case])
- for tc in test_cases:
- _try_rm(tc['file'])
- _try_rm(tc['file'] + '.part')
- _try_rm(tc['file'] + '.info.json')
+ def try_rm_tcs_files():
+ for tc in test_cases:
+ tc_filename = get_tc_filename(tc)
+ try_rm(tc_filename)
+ try_rm(tc_filename + '.part')
+ try_rm(tc_filename + '.info.json')
+ try_rm_tcs_files()
try:
- for retry in range(1, RETRIES + 1):
+ try_num = 1
+ while True:
try:
ydl.download([test_case['url']])
except (DownloadError, ExtractorError) as err:
- if retry == RETRIES: raise
-
# Check if the exception is not a network related one
- if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
+ if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
raise
- print('Retrying: {0} failed tries\n\n##########\n\n'.format(retry))
+ if try_num == RETRIES:
+ report_warning(u'Failed due to network errors, skipping...')
+ return
+
+ print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))
+
+ try_num += 1
else:
break
for tc in test_cases:
+ tc_filename = get_tc_filename(tc)
if not test_case.get('params', {}).get('skip_download', False):
- self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file'])
- self.assertTrue(tc['file'] in finished_hook_called)
- self.assertTrue(os.path.exists(tc['file'] + '.info.json'))
+ self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
+ self.assertTrue(tc_filename in finished_hook_called)
+ self.assertTrue(os.path.exists(tc_filename + '.info.json'))
if 'md5' in tc:
- md5_for_file = _file_md5(tc['file'])
+ md5_for_file = _file_md5(tc_filename)
self.assertEqual(md5_for_file, tc['md5'])
- with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:
+ with io.open(tc_filename + '.info.json', encoding='utf-8') as infof:
info_dict = json.load(infof)
for (info_field, expected) in tc.get('info_dict', {}).items():
if isinstance(expected, compat_str) and expected.startswith('md5:'):
@@ -143,11 +148,11 @@ def generator(test_case):
# Check for the presence of mandatory fields
for key in ('id', 'url', 'title', 'ext'):
self.assertTrue(key in info_dict.keys() and info_dict[key])
+ # Check for mandatory fields that are automatically set by YoutubeDL
+ for key in ['webpage_url', 'extractor', 'extractor_key']:
+ self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
finally:
- for tc in test_cases:
- _try_rm(tc['file'])
- _try_rm(tc['file'] + '.part')
- _try_rm(tc['file'] + '.info.json')
+ try_rm_tcs_files()
return test_template
diff --git a/test/test_playlists.py b/test/test_playlists.py
index c33511333..de1e8d88e 100644
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -1,13 +1,16 @@
#!/usr/bin/env python
# encoding: utf-8
-import sys
-import unittest
-import json
# Allow direct execution
import os
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import FakeYDL, global_setup
+global_setup()
+
from youtube_dl.extractor import (
DailymotionPlaylistIE,
@@ -16,10 +19,10 @@ from youtube_dl.extractor import (
UstreamChannelIE,
SoundcloudUserIE,
LivestreamIE,
+ NHLVideocenterIE,
+ BambuserChannelIE,
)
-from youtube_dl.utils import *
-from helper import FakeYDL
class TestPlaylists(unittest.TestCase):
def assertIsPlaylist(self, info):
@@ -74,5 +77,22 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], u'TEDCity2.0 (English)')
self.assertTrue(len(result['entries']) >= 4)
+ def test_nhl_videocenter(self):
+ dl = FakeYDL()
+ ie = NHLVideocenterIE(dl)
+ result = ie.extract('http://video.canucks.nhl.com/videocenter/console?catid=999')
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['id'], u'999')
+ self.assertEqual(result['title'], u'Highlights')
+ self.assertEqual(len(result['entries']), 12)
+
+ def test_bambuser_channel(self):
+ dl = FakeYDL()
+ ie = BambuserChannelIE(dl)
+ result = ie.extract('http://bambuser.com/channel/pixelversity')
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['title'], u'pixelversity')
+ self.assertTrue(len(result['entries']) >= 66)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_utils.py b/test/test_utils.py
index ff2e9885b..f3fbff042 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1,14 +1,15 @@
#!/usr/bin/env python
-
-# Various small unit tests
-
-import sys
-import unittest
-import xml.etree.ElementTree
+# coding: utf-8
# Allow direct execution
import os
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+# Various small unit tests
+import xml.etree.ElementTree
#from youtube_dl.utils import htmlentity_transform
from youtube_dl.utils import (
@@ -20,6 +21,9 @@ from youtube_dl.utils import (
unified_strdate,
find_xpath_attr,
get_meta_content,
+ xpath_with_ns,
+ smuggle_url,
+ unsmuggle_url,
)
if sys.version_info < (3, 0):
@@ -141,5 +145,31 @@ class TestUtil(unittest.TestCase):
self.assertEqual(get_meta('description'), u'foo & bar')
self.assertEqual(get_meta('author'), 'Plato')
+ def test_xpath_with_ns(self):
+ testxml = u'''
+
+ The Author
+ http://server.com/download.mp3
+
+ '''
+ doc = xml.etree.ElementTree.fromstring(testxml)
+ find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'}))
+ self.assertTrue(find('media:song') is not None)
+ self.assertEqual(find('media:song/media:author').text, u'The Author')
+ self.assertEqual(find('media:song/url').text, u'http://server.com/download.mp3')
+
+ def test_smuggle_url(self):
+ data = {u"ö": u"ö", u"abc": [3]}
+ url = 'https://foo.bar/baz?x=y#a'
+ smug_url = smuggle_url(url, data)
+ unsmug_url, unsmug_data = unsmuggle_url(smug_url)
+ self.assertEqual(url, unsmug_url)
+ self.assertEqual(data, unsmug_data)
+
+ res_url, res_data = unsmuggle_url(url)
+ self.assertEqual(res_url, url)
+ self.assertEqual(res_data, None)
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_write_annotations.py b/test/test_write_annotations.py
new file mode 100644
index 000000000..35defb895
--- /dev/null
+++ b/test/test_write_annotations.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import get_params, global_setup, try_rm
+global_setup()
+
+
+import io
+
+import xml.etree.ElementTree
+
+import youtube_dl.YoutubeDL
+import youtube_dl.extractor
+
+
+class YoutubeDL(youtube_dl.YoutubeDL):
+ def __init__(self, *args, **kwargs):
+ super(YoutubeDL, self).__init__(*args, **kwargs)
+ self.to_stderr = self.to_screen
+
+params = get_params({
+ 'writeannotations': True,
+ 'skip_download': True,
+ 'writeinfojson': False,
+ 'format': 'flv',
+})
+
+
+
+TEST_ID = 'gr51aVj-mLg'
+ANNOTATIONS_FILE = TEST_ID + '.flv.annotations.xml'
+EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label']
+
+class TestAnnotations(unittest.TestCase):
+ def setUp(self):
+ # Clear old files
+ self.tearDown()
+
+
+ def test_info_json(self):
+ expected = list(EXPECTED_ANNOTATIONS) #Two annotations could have the same text.
+ ie = youtube_dl.extractor.YoutubeIE()
+ ydl = YoutubeDL(params)
+ ydl.add_info_extractor(ie)
+ ydl.download([TEST_ID])
+ self.assertTrue(os.path.exists(ANNOTATIONS_FILE))
+ annoxml = None
+ with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof:
+ annoxml = xml.etree.ElementTree.parse(annof)
+ self.assertTrue(annoxml is not None, 'Failed to parse annotations XML')
+ root = annoxml.getroot()
+ self.assertEqual(root.tag, 'document')
+ annotationsTag = root.find('annotations')
+ self.assertEqual(annotationsTag.tag, 'annotations')
+ annotations = annotationsTag.findall('annotation')
+
+ #Not all the annotations have TEXT children and the annotations are returned unsorted.
+ for a in annotations:
+ self.assertEqual(a.tag, 'annotation')
+ if a.get('type') == 'text':
+ textTag = a.find('TEXT')
+ text = textTag.text
+ self.assertTrue(text in expected) #assertIn only added in python 2.7
+ #remove the first occurance, there could be more than one annotation with the same text
+ expected.remove(text)
+ #We should have seen (and removed) all the expected annotation texts.
+ self.assertEqual(len(expected), 0, 'Not all expected annotations were found.')
+
+
+ def tearDown(self):
+ try_rm(ANNOTATIONS_FILE)
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_write_info_json.py b/test/test_write_info_json.py
index de6d5180f..a5b6f6972 100644
--- a/test/test_write_info_json.py
+++ b/test/test_write_info_json.py
@@ -1,37 +1,34 @@
#!/usr/bin/env python
# coding: utf-8
-import json
+# Allow direct execution
import os
import sys
import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-# Allow direct execution
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from test.helper import get_params, global_setup
+global_setup()
+
+
+import io
+import json
import youtube_dl.YoutubeDL
import youtube_dl.extractor
-from youtube_dl.utils import *
-PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
-
-# General configuration (from __init__, not very elegant...)
-jar = compat_cookiejar.CookieJar()
-cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
-proxy_handler = compat_urllib_request.ProxyHandler()
-opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
-compat_urllib_request.install_opener(opener)
class YoutubeDL(youtube_dl.YoutubeDL):
def __init__(self, *args, **kwargs):
super(YoutubeDL, self).__init__(*args, **kwargs)
self.to_stderr = self.to_screen
-with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
- params = json.load(pf)
-params['writeinfojson'] = True
-params['skip_download'] = True
-params['writedescription'] = True
+params = get_params({
+ 'writeinfojson': True,
+ 'skip_download': True,
+ 'writedescription': True,
+})
+
TEST_ID = 'BaW_jenozKc'
INFO_JSON_FILE = TEST_ID + '.mp4.info.json'
@@ -42,6 +39,7 @@ This is a test video for youtube-dl.
For more information, contact phihag@phihag.de .'''
+
class TestInfoJSON(unittest.TestCase):
def setUp(self):
# Clear old files
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py
index dd9e292b0..4b7a7847b 100644
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -1,20 +1,26 @@
#!/usr/bin/env python
-import sys
-import unittest
-import json
-
# Allow direct execution
import os
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE, YoutubeShowIE
-from youtube_dl.utils import *
+from test.helper import FakeYDL, global_setup
+global_setup()
+
+
+from youtube_dl.extractor import (
+ YoutubeUserIE,
+ YoutubePlaylistIE,
+ YoutubeIE,
+ YoutubeChannelIE,
+ YoutubeShowIE,
+)
-from helper import FakeYDL
class TestYoutubeLists(unittest.TestCase):
- def assertIsPlaylist(self,info):
+ def assertIsPlaylist(self, info):
"""Make sure the info has '_type' set to 'playlist'"""
self.assertEqual(info['_type'], 'playlist')
@@ -27,6 +33,14 @@ class TestYoutubeLists(unittest.TestCase):
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])
+ def test_youtube_playlist_noplaylist(self):
+ dl = FakeYDL()
+ dl.params['noplaylist'] = True
+ ie = YoutubePlaylistIE(dl)
+ result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
+ self.assertEqual(result['_type'], 'url')
+ self.assertEqual(YoutubeIE()._extract_id(result['url']), 'FXxLjLQi3Fg')
+
def test_issue_673(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
@@ -92,7 +106,7 @@ class TestYoutubeLists(unittest.TestCase):
dl = FakeYDL()
ie = YoutubeShowIE(dl)
result = ie.extract('http://www.youtube.com/show/airdisasters')
- self.assertTrue(len(result) >= 4)
+ self.assertTrue(len(result) >= 3)
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 5007d9a16..5e1ff5eb0 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -1,14 +1,18 @@
#!/usr/bin/env python
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import global_setup
+global_setup()
+
+
import io
import re
import string
-import sys
-import unittest
-
-# Allow direct execution
-import os
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.extractor import YoutubeIE
from youtube_dl.utils import compat_str, compat_urlretrieve
diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py
index 168e6c66c..00430a338 100644
--- a/test/test_youtube_subtitles.py
+++ b/test/test_youtube_subtitles.py
@@ -1,76 +1,87 @@
#!/usr/bin/env python
-import sys
-import unittest
-import json
-import io
-import hashlib
-
# Allow direct execution
import os
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import FakeYDL, global_setup, md5
+global_setup()
+
from youtube_dl.extractor import YoutubeIE
-from youtube_dl.utils import *
-from helper import FakeYDL
-md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
class TestYoutubeSubtitles(unittest.TestCase):
def setUp(self):
self.DL = FakeYDL()
self.url = 'QRS8MkLhQmM'
+
def getInfoDict(self):
IE = YoutubeIE(self.DL)
info_dict = IE.extract(self.url)
return info_dict
+
def getSubtitles(self):
info_dict = self.getInfoDict()
- return info_dict[0]['subtitles']
+ return info_dict[0]['subtitles']
+
def test_youtube_no_writesubtitles(self):
self.DL.params['writesubtitles'] = False
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
+
def test_youtube_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
+
def test_youtube_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
+
def test_youtube_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 13)
+
def test_youtube_subtitles_sbv_format(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'sbv'
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
+
def test_youtube_subtitles_vtt_format(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'vtt'
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
+
def test_youtube_list_subtitles(self):
+ self.DL.expect_warning(u'Video doesn\'t have automatic captions')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
+
def test_youtube_automatic_captions(self):
self.url = '8YoUxe5ncPo'
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles()
self.assertTrue(subtitles['it'] is not None)
+
def test_youtube_nosubtitles(self):
+ self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'sAjKT8FhjI8'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
+
def test_youtube_multiple_langs(self):
self.url = 'QRS8MkLhQmM'
self.DL.params['writesubtitles'] = True
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 000000000..ed01e3386
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,8 @@
+[tox]
+envlist = py26,py27,py33
+[testenv]
+deps =
+ nose
+ coverage
+commands = nosetests --verbose {posargs:test} # --with-coverage --cover-package=youtube_dl --cover-html
+ # test.test_download:TestDownload.test_NowVideo
diff --git a/youtube-dl b/youtube-dl
index d2401a2d8..ba664b481 100755
Binary files a/youtube-dl and b/youtube-dl differ
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py
index d6673fd3a..8ecabab1a 100644
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -270,6 +270,7 @@ class FileDownloader(object):
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
+ test = self.params.get('test', False)
# Check for rtmpdump first
try:
@@ -291,6 +292,8 @@ class FileDownloader(object):
basic_args += ['--playpath', play_path]
if tc_url is not None:
basic_args += ['--tcUrl', url]
+ if test:
+ basic_args += ['--stop', '1']
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
if self.params.get('verbose', False):
try:
@@ -300,7 +303,7 @@ class FileDownloader(object):
shell_quote = repr
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
retval = subprocess.call(args)
- while retval == 2 or retval == 1:
+ while (retval == 2 or retval == 1) and not test:
prevsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
time.sleep(5.0) # This seems to be needed
@@ -313,7 +316,7 @@ class FileDownloader(object):
self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
retval = 0
break
- if retval == 0:
+ if retval == 0 or (test and retval == 2):
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
self.try_rename(tmpfilename, filename)
diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py
index 3ee1d3c58..13b56ede5 100644
--- a/youtube_dl/PostProcessor.py
+++ b/youtube_dl/PostProcessor.py
@@ -3,7 +3,14 @@ import subprocess
import sys
import time
-from .utils import *
+
+from .utils import (
+ compat_subprocess_get_DEVNULL,
+ encodeFilename,
+ PostProcessingError,
+ shell_quote,
+ subtitles_filename,
+)
class PostProcessor(object):
@@ -82,6 +89,8 @@ class FFmpegPostProcessor(PostProcessor):
+ opts +
[encodeFilename(self._ffmpeg_filename_argument(out_path))])
+ if self._downloader.params.get('verbose', False):
+ self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout,stderr = p.communicate()
if p.returncode != 0:
@@ -177,7 +186,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
extension = self._preferredcodec
more_opts = []
if self._preferredquality is not None:
- if int(self._preferredquality) < 10:
+ # The opus codec doesn't support the -aq option
+ if int(self._preferredquality) < 10 and extension != 'opus':
more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
else:
more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
@@ -467,3 +477,35 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
return True, information
+
+
+class FFmpegMetadataPP(FFmpegPostProcessor):
+ def run(self, info):
+ metadata = {}
+ if info.get('title') is not None:
+ metadata['title'] = info['title']
+ if info.get('upload_date') is not None:
+ metadata['date'] = info['upload_date']
+ if info.get('uploader') is not None:
+ metadata['artist'] = info['uploader']
+ elif info.get('uploader_id') is not None:
+ metadata['artist'] = info['uploader_id']
+
+ if not metadata:
+ self._downloader.to_screen(u'[ffmpeg] There isn\'t any metadata to add')
+ return True, info
+
+ filename = info['filepath']
+ ext = os.path.splitext(filename)[1][1:]
+ temp_filename = filename + u'.temp'
+
+ options = ['-c', 'copy']
+ for (name, value) in metadata.items():
+ options.extend(['-metadata', '%s="%s"' % (name, value)])
+ options.extend(['-f', ext])
+
+ self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
+ self.run_ffmpeg(filename, temp_filename, options)
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+ return True, info
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 62982521e..d3562826e 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -3,6 +3,7 @@
from __future__ import absolute_import
+import errno
import io
import os
import re
@@ -70,6 +71,7 @@ class YoutubeDL(object):
logtostderr: Log messages to stderr instead of stdout.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
+ writeannotations: Write the video annotations to a .annotations.xml file
writethumbnail: Write the thumbnail image to a file
writesubtitles: Write the video subtitles to a file
writeautomaticsub: Write the automatic subtitles to a file
@@ -83,7 +85,13 @@ class YoutubeDL(object):
skip_download: Skip the actual download of the video file
cachedir: Location of the cache files in the filesystem.
None to disable filesystem cache.
-
+ noplaylist: Download single video instead of a playlist if in doubt.
+ age_limit: An integer representing the user's age in years.
+ Unsuitable videos for the given age are skipped.
+ downloadarchive: File name of a file where all downloads are recorded.
+ Videos already present in the file are not downloaded
+ again.
+
The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader:
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
@@ -112,7 +120,7 @@ class YoutubeDL(object):
and not params['restrictfilenames']):
# On Python 3, the Unicode filesystem API will throw errors (#1474)
self.report_warning(
- u'Assuming --restrict-filenames isnce file system encoding '
+ u'Assuming --restrict-filenames since file system encoding '
u'cannot encode all charactes. '
u'Set the LC_ALL environment variable to fix this.')
params['restrictfilenames'] = True
@@ -208,10 +216,10 @@ class YoutubeDL(object):
If stderr is a tty file the 'WARNING:' will be colored
'''
if sys.stderr.isatty() and os.name != 'nt':
- _msg_header=u'\033[0;33mWARNING:\033[0m'
+ _msg_header = u'\033[0;33mWARNING:\033[0m'
else:
- _msg_header=u'WARNING:'
- warning_message=u'%s %s' % (_msg_header,message)
+ _msg_header = u'WARNING:'
+ warning_message = u'%s %s' % (_msg_header, message)
self.to_stderr(warning_message)
def report_error(self, message, tb=None):
@@ -226,19 +234,6 @@ class YoutubeDL(object):
error_message = u'%s %s' % (_msg_header, message)
self.trouble(error_message, tb)
- def slow_down(self, start_time, byte_counter):
- """Sleep if the download speed is over the rate limit."""
- rate_limit = self.params.get('ratelimit', None)
- if rate_limit is None or byte_counter == 0:
- return
- now = time.time()
- elapsed = now - start_time
- if elapsed <= 0.0:
- return
- speed = float(byte_counter) / elapsed
- if speed > rate_limit:
- time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
-
def report_writedescription(self, descfn):
""" Report that the description file is being written """
self.to_screen(u'[info] Writing video description to: ' + descfn)
@@ -251,6 +246,10 @@ class YoutubeDL(object):
""" Report that the metadata file has been written """
self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
+ def report_writeannotations(self, annofn):
+ """ Report that the annotations file has been written. """
+ self.to_screen(u'[info] Writing video annotations to: ' + annofn)
+
def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded."""
try:
@@ -273,16 +272,18 @@ class YoutubeDL(object):
autonumber_size = 5
autonumber_templ = u'%0' + str(autonumber_size) + u'd'
template_dict['autonumber'] = autonumber_templ % self._num_downloads
- if template_dict['playlist_index'] is not None:
+ if template_dict.get('playlist_index') is not None:
template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
- sanitize = lambda k,v: sanitize_filename(
+ sanitize = lambda k, v: sanitize_filename(
u'NA' if v is None else compat_str(v),
restricted=self.params.get('restrictfilenames'),
- is_id=(k==u'id'))
- template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
+ is_id=(k == u'id'))
+ template_dict = dict((k, sanitize(k, v))
+ for k, v in template_dict.items())
- filename = self.params['outtmpl'] % template_dict
+ tmpl = os.path.expanduser(self.params['outtmpl'])
+ filename = tmpl % template_dict
return filename
except KeyError as err:
self.report_error(u'Erroneous output template')
@@ -308,15 +309,28 @@ class YoutubeDL(object):
dateRange = self.params.get('daterange', DateRange())
if date not in dateRange:
return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
+ age_limit = self.params.get('age_limit')
+ if age_limit is not None:
+ if age_limit < info_dict.get('age_limit', 0):
+ return u'Skipping "' + title + '" because it is age restricted'
+ if self.in_download_archive(info_dict):
+ return (u'%(title)s has already been recorded in archive'
+ % info_dict)
return None
-
+
+ @staticmethod
+ def add_extra_info(info_dict, extra_info):
+ '''Set the keys from extra_info in info dict if they are missing'''
+ for key, value in extra_info.items():
+ info_dict.setdefault(key, value)
+
def extract_info(self, url, download=True, ie_key=None, extra_info={}):
'''
Returns a list with a dictionary for each video we find.
If 'download', also downloads the videos.
extra_info is a dict containing the extra values to add to each result
'''
-
+
if ie_key:
ies = [self.get_info_extractor(ie_key)]
else:
@@ -336,17 +350,17 @@ class YoutubeDL(object):
break
if isinstance(ie_result, list):
# Backwards compatibility: old IE result format
- for result in ie_result:
- result.update(extra_info)
ie_result = {
'_type': 'compat_list',
'entries': ie_result,
}
- else:
- ie_result.update(extra_info)
- if 'extractor' not in ie_result:
- ie_result['extractor'] = ie.IE_NAME
- return self.process_ie_result(ie_result, download=download)
+ self.add_extra_info(ie_result,
+ {
+ 'extractor': ie.IE_NAME,
+ 'webpage_url': url,
+ 'extractor_key': ie.ie_key(),
+ })
+ return self.process_ie_result(ie_result, download, extra_info)
except ExtractorError as de: # An error we somewhat expected
self.report_error(compat_str(de), de.format_traceback())
break
@@ -358,7 +372,7 @@ class YoutubeDL(object):
raise
else:
self.report_error(u'no suitable InfoExtractor: %s' % url)
-
+
def process_ie_result(self, ie_result, download=True, extra_info={}):
"""
Take the result of the ie(may be modified) and resolve all unresolved
@@ -370,14 +384,8 @@ class YoutubeDL(object):
result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
if result_type == 'video':
- ie_result.update(extra_info)
- if 'playlist' not in ie_result:
- # It isn't part of a playlist
- ie_result['playlist'] = None
- ie_result['playlist_index'] = None
- if download:
- self.process_info(ie_result)
- return ie_result
+ self.add_extra_info(ie_result, extra_info)
+ return self.process_video_result(ie_result)
elif result_type == 'url':
# We have to add extra_info to the results because it may be
# contained in a playlist
@@ -386,9 +394,10 @@ class YoutubeDL(object):
ie_key=ie_result.get('ie_key'),
extra_info=extra_info)
elif result_type == 'playlist':
+ self.add_extra_info(ie_result, extra_info)
# We process each entry in the playlist
playlist = ie_result.get('title', None) or ie_result.get('id', None)
- self.to_screen(u'[download] Downloading playlist: %s' % playlist)
+ self.to_screen(u'[download] Downloading playlist: %s' % playlist)
playlist_results = []
@@ -406,17 +415,15 @@ class YoutubeDL(object):
self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
(ie_result['extractor'], playlist, n_all_entries, n_entries))
- for i,entry in enumerate(entries,1):
- self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
+ for i, entry in enumerate(entries, 1):
+ self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
extra = {
- 'playlist': playlist,
- 'playlist_index': i + playliststart,
- }
- if not 'extractor' in entry:
- # We set the extractor, if it's an url it will be set then to
- # the new extractor, but if it's already a video we must make
- # sure it's present: see issue #877
- entry['extractor'] = ie_result['extractor']
+ 'playlist': playlist,
+ 'playlist_index': i + playliststart,
+ 'extractor': ie_result['extractor'],
+ 'webpage_url': ie_result['webpage_url'],
+ 'extractor_key': ie_result['extractor_key'],
+ }
entry_result = self.process_ie_result(entry,
download=download,
extra_info=extra)
@@ -425,16 +432,122 @@ class YoutubeDL(object):
return ie_result
elif result_type == 'compat_list':
def _fixup(r):
- r.setdefault('extractor', ie_result['extractor'])
+ self.add_extra_info(r,
+ {
+ 'extractor': ie_result['extractor'],
+ 'webpage_url': ie_result['webpage_url'],
+ 'extractor_key': ie_result['extractor_key'],
+ })
return r
ie_result['entries'] = [
- self.process_ie_result(_fixup(r), download=download)
+ self.process_ie_result(_fixup(r), download, extra_info)
for r in ie_result['entries']
]
return ie_result
else:
raise Exception('Invalid result type: %s' % result_type)
+ def select_format(self, format_spec, available_formats):
+ if format_spec == 'best' or format_spec is None:
+ return available_formats[-1]
+ elif format_spec == 'worst':
+ return available_formats[0]
+ else:
+ extensions = [u'mp4', u'flv', u'webm', u'3gp']
+ if format_spec in extensions:
+ filter_f = lambda f: f['ext'] == format_spec
+ else:
+ filter_f = lambda f: f['format_id'] == format_spec
+ matches = list(filter(filter_f, available_formats))
+ if matches:
+ return matches[-1]
+ return None
+
+ def process_video_result(self, info_dict, download=True):
+ assert info_dict.get('_type', 'video') == 'video'
+
+ if 'playlist' not in info_dict:
+ # It isn't part of a playlist
+ info_dict['playlist'] = None
+ info_dict['playlist_index'] = None
+
+ # This extractors handle format selection themselves
+ if info_dict['extractor'] in [u'youtube', u'Youku']:
+ if download:
+ self.process_info(info_dict)
+ return info_dict
+
+ # We now pick which formats have to be downloaded
+ if info_dict.get('formats') is None:
+ # There's only one format available
+ formats = [info_dict]
+ else:
+ formats = info_dict['formats']
+
+ # We check that all the formats have the format and format_id fields
+ for (i, format) in enumerate(formats):
+ if format.get('format_id') is None:
+ format['format_id'] = compat_str(i)
+ if format.get('format') is None:
+ format['format'] = u'{id} - {res}{note}'.format(
+ id=format['format_id'],
+ res=self.format_resolution(format),
+ note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
+ )
+ # Automatically determine file extension if missing
+ if 'ext' not in format:
+ format['ext'] = determine_ext(format['url'])
+
+ if self.params.get('listformats', None):
+ self.list_formats(info_dict)
+ return
+
+ format_limit = self.params.get('format_limit', None)
+ if format_limit:
+ formats = list(takewhile_inclusive(
+ lambda f: f['format_id'] != format_limit, formats
+ ))
+ if self.params.get('prefer_free_formats'):
+ def _free_formats_key(f):
+ try:
+ ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
+ except ValueError:
+ ext_ord = -1
+ # We only compare the extension if they have the same height and width
+ return (f.get('height'), f.get('width'), ext_ord)
+ formats = sorted(formats, key=_free_formats_key)
+
+ req_format = self.params.get('format', 'best')
+ if req_format is None:
+ req_format = 'best'
+ formats_to_download = []
+ # The -1 is for supporting YoutubeIE
+ if req_format in ('-1', 'all'):
+ formats_to_download = formats
+ else:
+ # We can accept formats requestd in the format: 34/5/best, we pick
+ # the first that is available, starting from left
+ req_formats = req_format.split('/')
+ for rf in req_formats:
+ selected_format = self.select_format(rf, formats)
+ if selected_format is not None:
+ formats_to_download = [selected_format]
+ break
+ if not formats_to_download:
+ raise ExtractorError(u'requested format not available',
+ expected=True)
+
+ if download:
+ if len(formats_to_download) > 1:
+ self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
+ for format in formats_to_download:
+ new_info = dict(info_dict)
+ new_info.update(format)
+ self.process_info(new_info)
+ # We update the info dict with the best quality format (backwards compatibility)
+ info_dict.update(formats_to_download[-1])
+ return info_dict
+
def process_info(self, info_dict):
"""Process a single resolved IE result."""
@@ -472,9 +585,9 @@ class YoutubeDL(object):
if self.params.get('forceurl', False):
# For RTMP URLs, also include the playpath
compat_print(info_dict['url'] + info_dict.get('play_path', u''))
- if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
+ if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
compat_print(info_dict['thumbnail'])
- if self.params.get('forcedescription', False) and 'description' in info_dict:
+ if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
compat_print(info_dict['description'])
if self.params.get('forcefilename', False) and filename is not None:
compat_print(filename)
@@ -508,10 +621,22 @@ class YoutubeDL(object):
self.report_error(u'Cannot write description file ' + descfn)
return
+ if self.params.get('writeannotations', False):
+ try:
+ annofn = filename + u'.annotations.xml'
+ self.report_writeannotations(annofn)
+ with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
+ annofile.write(info_dict['annotations'])
+ except (KeyError, TypeError):
+ self.report_warning(u'There are no annotations to write.')
+ except (OSError, IOError):
+ self.report_error(u'Cannot write annotations file: ' + annofn)
+ return
+
subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub')])
- if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
+ if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
subtitles = info_dict['subtitles']
@@ -533,7 +658,7 @@ class YoutubeDL(object):
infofn = filename + u'.info.json'
self.report_writeinfojson(infofn)
try:
- json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
+ json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
write_json_file(json_info_dict, encodeFilename(infofn))
except (OSError, IOError):
self.report_error(u'Cannot write metadata to JSON file ' + infofn)
@@ -578,6 +703,8 @@ class YoutubeDL(object):
self.report_error(u'postprocessing: %s' % str(err))
return
+ self.record_download_archive(info_dict)
+
def download(self, url_list):
"""Download a given list of URLs."""
if len(url_list) > 1 and self.fixed_template():
@@ -616,7 +743,7 @@ class YoutubeDL(object):
self.to_screen('[download] Writing metadata to the file\'s xattrs')
xattr_mapping = {
- 'user.xdg.referrer.url': 'referrer',
+ 'user.xdg.referrer.url': 'webpage_url',
# 'user.xdg.comment': 'description',
'user.dublincore.title': 'title',
'user.dublincore.date': 'upload_date',
@@ -648,7 +775,7 @@ class YoutubeDL(object):
keep_video = None
for pp in self._pps:
try:
- keep_video_wish,new_info = pp.run(info)
+ keep_video_wish, new_info = pp.run(info)
if keep_video_wish is not None:
if keep_video_wish:
keep_video = keep_video_wish
@@ -663,3 +790,61 @@ class YoutubeDL(object):
os.remove(encodeFilename(filename))
except (IOError, OSError):
self.report_warning(u'Unable to remove downloaded video file')
+
+ def in_download_archive(self, info_dict):
+ fn = self.params.get('download_archive')
+ if fn is None:
+ return False
+ vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+ try:
+ with locked_file(fn, 'r', encoding='utf-8') as archive_file:
+ for line in archive_file:
+ if line.strip() == vid_id:
+ return True
+ except IOError as ioe:
+ if ioe.errno != errno.ENOENT:
+ raise
+ return False
+
+ def record_download_archive(self, info_dict):
+ fn = self.params.get('download_archive')
+ if fn is None:
+ return
+ vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+ with locked_file(fn, 'a', encoding='utf-8') as archive_file:
+ archive_file.write(vid_id + u'\n')
+
+ @staticmethod
+ def format_resolution(format, default='unknown'):
+ if format.get('_resolution') is not None:
+ return format['_resolution']
+ if format.get('height') is not None:
+ if format.get('width') is not None:
+ res = u'%sx%s' % (format['width'], format['height'])
+ else:
+ res = u'%sp' % format['height']
+ else:
+ res = default
+ return res
+
+ def list_formats(self, info_dict):
+ def line(format):
+ return (u'%-15s%-10s%-12s%s' % (
+ format['format_id'],
+ format['ext'],
+ self.format_resolution(format),
+ format.get('format_note', ''),
+ )
+ )
+
+ formats = info_dict.get('formats', [info_dict])
+ formats_s = list(map(line, formats))
+ if len(formats) > 1:
+ formats_s[0] += (' ' if formats[0].get('format_note') else '') + '(worst)'
+ formats_s[-1] += (' ' if formats[-1].get('format_note') else '') + '(best)'
+
+ header_line = line({
+ 'format_id': u'format code', 'ext': u'extension',
+ '_resolution': u'resolution', 'format_note': u'note'})
+ self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
+ (info_dict['id'], header_line, u"\n".join(formats_s)))
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 3851fc0a6..48ffcbf8e 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -31,11 +31,13 @@ __authors__ = (
'Huarong Huo',
'Ismael Mejía',
'Steffan \'Ruirize\' James',
+ 'Andras Elso',
)
__license__ = 'Public Domain'
import codecs
+import collections
import getpass
import optparse
import os
@@ -45,17 +47,43 @@ import shlex
import socket
import subprocess
import sys
-import warnings
+import traceback
import platform
-from .utils import *
+from .utils import (
+ compat_cookiejar,
+ compat_print,
+ compat_str,
+ compat_urllib_request,
+ DateRange,
+ decodeOption,
+ determine_ext,
+ DownloadError,
+ get_cachedir,
+ make_HTTPS_handler,
+ MaxDownloadsReached,
+ platform_name,
+ preferredencoding,
+ SameFileError,
+ std_headers,
+ write_string,
+ YoutubeDLHandler,
+)
from .update import update_self
from .version import __version__
-from .FileDownloader import *
+from .FileDownloader import (
+ FileDownloader,
+)
from .extractor import gen_extractors
from .YoutubeDL import YoutubeDL
-from .PostProcessor import *
+from .PostProcessor import (
+ FFmpegMetadataPP,
+ FFmpegVideoConvertor,
+ FFmpegExtractAudioPP,
+ FFmpegEmbedSubtitlePP,
+)
+
def parseOpts(overrideArguments=None):
def _readOptions(filename_bytes):
@@ -105,7 +133,7 @@ def parseOpts(overrideArguments=None):
def _hide_login_info(opts):
opts = list(opts)
- for private_opt in ['-p', '--password', '-u', '--username']:
+ for private_opt in ['-p', '--password', '-u', '--username', '--video-password']:
try:
i = opts.index(private_opt)
opts[i+1] = ''
@@ -151,6 +179,9 @@ def parseOpts(overrideArguments=None):
action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
general.add_option('-i', '--ignore-errors',
action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False)
+ general.add_option('--abort-on-error',
+ action='store_false', dest='ignoreerrors',
+ help='Abort downloading of further videos (in the playlist or the command line) if an error occurs')
general.add_option('--dump-user-agent',
action='store_true', dest='dump_user_agent',
help='display the current browser identification', default=False)
@@ -168,8 +199,8 @@ def parseOpts(overrideArguments=None):
general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
general.add_option(
- '--cache-dir', dest='cachedir', default=u'~/.youtube-dl/cache',
- help='Location in the filesystem where youtube-dl can store downloaded information permanently. %default by default')
+ '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
+ help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .')
general.add_option(
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
help='Disable filesystem caching')
@@ -187,6 +218,13 @@ def parseOpts(overrideArguments=None):
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
+ selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
+ selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
+ help='download only videos suitable for the given age',
+ default=None, type=int)
+ selection.add_option('--download-archive', metavar='FILE',
+ dest='download_archive',
+ help='Download only videos not present in the archive file. Record all downloaded videos in it.')
authentication.add_option('-u', '--username',
@@ -200,7 +238,7 @@ def parseOpts(overrideArguments=None):
video_format.add_option('-f', '--format',
- action='store', dest='format', metavar='FORMAT',
+ action='store', dest='format', metavar='FORMAT', default='best',
help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
video_format.add_option('--all-formats',
action='store_const', dest='format', help='download all available video formats', const='all')
@@ -232,11 +270,11 @@ def parseOpts(overrideArguments=None):
help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
downloader.add_option('-r', '--rate-limit',
- dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)')
+ dest='ratelimit', metavar='LIMIT', help='maximum download rate in bytes per second (e.g. 50K or 4.2M)')
downloader.add_option('-R', '--retries',
dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
downloader.add_option('--buffer-size',
- dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024")
+ dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16K) (default is %default)', default="1024")
downloader.add_option('--no-resize-buffer',
action='store_true', dest='noresizebuffer',
help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
@@ -278,6 +316,9 @@ def parseOpts(overrideArguments=None):
verbosity.add_option('--dump-intermediate-pages',
action='store_true', dest='dump_intermediate_pages', default=False,
help='print downloaded pages to debug problems(very verbose)')
+ verbosity.add_option('--write-pages',
+ action='store_true', dest='write_pages', default=False,
+ help='Write downloaded pages to files in the current directory')
verbosity.add_option('--youtube-print-sig-code',
action='store_true', dest='youtube_print_sig_code', default=False,
help=optparse.SUPPRESS_HELP)
@@ -297,7 +338,10 @@ def parseOpts(overrideArguments=None):
help=('output filename template. Use %(title)s to get the title, '
'%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
'%(autonumber)s to get an automatically incremented number, '
- '%(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), '
+ '%(ext)s for the filename extension, '
+ '%(format)s for the format description (like "22 - 1280x720" or "HD"),'
+ '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"),'
+ '%(upload_date)s for the upload date (YYYYMMDD), '
'%(extractor)s for the provider (youtube, metacafe, etc), '
'%(id)s for the video id , %(playlist)s for the playlist the video is in, '
'%(playlist_index)s for the position in the playlist and %% for a literal percent. '
@@ -331,6 +375,9 @@ def parseOpts(overrideArguments=None):
filesystem.add_option('--write-info-json',
action='store_true', dest='writeinfojson',
help='write video metadata to a .info.json file', default=False)
+ filesystem.add_option('--write-annotations',
+ action='store_true', dest='writeannotations',
+ help='write video annotations to a .annotation file', default=False)
filesystem.add_option('--write-thumbnail',
action='store_true', dest='writethumbnail',
help='write thumbnail image to disk', default=False)
@@ -350,6 +397,8 @@ def parseOpts(overrideArguments=None):
help='do not overwrite post-processed files; the post-processed files are overwritten by default')
postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
help='embed subtitles in the video (only for mp4 videos)')
+ postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False,
+ help='add metadata to the files')
parser.add_option_group(general)
@@ -369,9 +418,13 @@ def parseOpts(overrideArguments=None):
else:
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
if xdg_config_home:
- userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
+ userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
+ if not os.path.isfile(userConfFile):
+ userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
else:
- userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
+ userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
+ if not os.path.isfile(userConfFile):
+ userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
systemConf = _readOptions('/etc/youtube-dl.conf')
userConf = _readOptions(userConfFile)
commandLineConf = sys.argv[1:]
@@ -436,27 +489,7 @@ def _real_main(argv=None):
all_urls = batchurls + args
all_urls = [url.strip() for url in all_urls]
- # General configuration
- cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
- if opts.proxy is not None:
- if opts.proxy == '':
- proxies = {}
- else:
- proxies = {'http': opts.proxy, 'https': opts.proxy}
- else:
- proxies = compat_urllib_request.getproxies()
- # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
- if 'http' in proxies and 'https' not in proxies:
- proxies['https'] = proxies['http']
- proxy_handler = compat_urllib_request.ProxyHandler(proxies)
- https_handler = make_HTTPS_handler(opts)
- opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
- # Delete the default user-agent header, which would otherwise apply in
- # cases where our custom HTTP handler doesn't come into play
- # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
- opener.addheaders =[]
- compat_urllib_request.install_opener(opener)
- socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
+ opener = _setup_opener(jar=jar, opts=opts)
extractors = gen_extractors()
@@ -473,6 +506,8 @@ def _real_main(argv=None):
if not ie._WORKING:
continue
desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
+ if desc is False:
+ continue
if hasattr(ie, 'SEARCH_KEY'):
_SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise')
_COUNTS = (u'', u'5', u'10', u'all')
@@ -599,11 +634,13 @@ def _real_main(argv=None):
'progress_with_newline': opts.progress_with_newline,
'playliststart': opts.playliststart,
'playlistend': opts.playlistend,
+ 'noplaylist': opts.noplaylist,
'logtostderr': opts.outtmpl == '-',
'consoletitle': opts.consoletitle,
'nopart': opts.nopart,
'updatetime': opts.updatetime,
'writedescription': opts.writedescription,
+ 'writeannotations': opts.writeannotations,
'writeinfojson': opts.writeinfojson,
'writethumbnail': opts.writethumbnail,
'writesubtitles': opts.writesubtitles,
@@ -618,6 +655,7 @@ def _real_main(argv=None):
'prefer_free_formats': opts.prefer_free_formats,
'verbose': opts.verbose,
'dump_intermediate_pages': opts.dump_intermediate_pages,
+ 'write_pages': opts.write_pages,
'test': opts.test,
'keepvideo': opts.keepvideo,
'min_filesize': opts.min_filesize,
@@ -625,6 +663,8 @@ def _real_main(argv=None):
'daterange': date,
'cachedir': opts.cachedir,
'youtube_print_sig_code': opts.youtube_print_sig_code,
+ 'age_limit': opts.age_limit,
+ 'download_archive': opts.download_archive,
})
if opts.verbose:
@@ -644,11 +684,19 @@ def _real_main(argv=None):
except:
pass
write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
- write_string(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
+
+ proxy_map = {}
+ for handler in opener.handlers:
+ if hasattr(handler, 'proxies'):
+ proxy_map.update(handler.proxies)
+ write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
ydl.add_default_info_extractors()
# PostProcessors
+ # Add the metadata pp first, the other pps will copy it
+ if opts.addmetadata:
+ ydl.add_post_processor(FFmpegMetadataPP())
if opts.extractaudio:
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
if opts.recodevideo:
@@ -658,7 +706,7 @@ def _real_main(argv=None):
# Update version
if opts.update_self:
- update_self(ydl.to_screen, opts.verbose, sys.argv[0])
+ update_self(ydl.to_screen, opts.verbose)
# Maybe do nothing
if len(all_urls) < 1:
@@ -677,11 +725,42 @@ def _real_main(argv=None):
if opts.cookiefile is not None:
try:
jar.save()
- except (IOError, OSError) as err:
+ except (IOError, OSError):
sys.exit(u'ERROR: unable to save cookie jar')
sys.exit(retcode)
+
+def _setup_opener(jar=None, opts=None, timeout=300):
+ if opts is None:
+ FakeOptions = collections.namedtuple(
+ 'FakeOptions', ['proxy', 'no_check_certificate'])
+ opts = FakeOptions(proxy=None, no_check_certificate=False)
+
+ cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
+ if opts.proxy is not None:
+ if opts.proxy == '':
+ proxies = {}
+ else:
+ proxies = {'http': opts.proxy, 'https': opts.proxy}
+ else:
+ proxies = compat_urllib_request.getproxies()
+ # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
+ if 'http' in proxies and 'https' not in proxies:
+ proxies['https'] = proxies['http']
+ proxy_handler = compat_urllib_request.ProxyHandler(proxies)
+ https_handler = make_HTTPS_handler(opts)
+ opener = compat_urllib_request.build_opener(
+ https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
+ # Delete the default user-agent header, which would otherwise apply in
+ # cases where our custom HTTP handler doesn't come into play
+ # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
+ opener.addheaders = []
+ compat_urllib_request.install_opener(opener)
+ socket.setdefaulttimeout(timeout)
+ return opener
+
+
def main(argv=None):
try:
_real_main(argv)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index d1b7e5f99..888a91cce 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -2,8 +2,14 @@ from .appletrailers import AppleTrailersIE
from .addanime import AddAnimeIE
from .archiveorg import ArchiveOrgIE
from .ard import ARDIE
-from .arte import ArteTvIE
+from .arte import (
+ ArteTvIE,
+ ArteTVPlus7IE,
+ ArteTVCreativeIE,
+ ArteTVFutureIE,
+)
from .auengine import AUEngineIE
+from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE
from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE
@@ -12,6 +18,7 @@ from .brightcove import BrightcoveIE
from .c56 import C56IE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
+from .cinemassacre import CinemassacreIE
from .cnn import CNNIE
from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE
@@ -33,7 +40,9 @@ from .ehow import EHowIE
from .eighttracks import EightTracksIE
from .escapist import EscapistIE
from .exfm import ExfmIE
+from .extremetube import ExtremeTubeIE
from .facebook import FacebookIE
+from .faz import FazIE
from .fktv import (
FKTVIE,
FKTVPosteckeIE,
@@ -60,10 +69,12 @@ from .ign import IGNIE, OneUPIE
from .ina import InaIE
from .infoq import InfoQIE
from .instagram import InstagramIE
+from .internetvideoarchive import InternetVideoArchiveIE
from .jeuxvideo import JeuxVideoIE
from .jukebox import JukeboxIE
from .justintv import JustinTVIE
from .kankan import KankanIE
+from .keezmovies import KeezMoviesIE
from .kickstarter import KickStarterIE
from .keek import KeekIE
from .liveleak import LiveLeakIE
@@ -72,41 +83,52 @@ from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
from .mit import TechTVMITIE, MITIE
from .mixcloud import MixcloudIE
+from .mofosex import MofosexIE
from .mtv import MTVIE
from .muzu import MuzuTVIE
+from .myspace import MySpaceIE
from .myspass import MySpassIE
from .myvideo import MyVideoIE
from .naver import NaverIE
from .nba import NBAIE
from .nbc import NBCNewsIE
from .newgrounds import NewgroundsIE
+from .nhl import NHLIE, NHLVideocenterIE
+from .nowvideo import NowVideoIE
from .ooyala import OoyalaIE
from .orf import ORFIE
from .pbs import PBSIE
from .photobucket import PhotobucketIE
+from .pornhub import PornHubIE
from .pornotube import PornotubeIE
from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE
from .ringtv import RingTVIE
from .ro220 import Ro220IE
+from .rottentomatoes import RottenTomatoesIE
from .roxwel import RoxwelIE
from .rtlnow import RTLnowIE
+from .rutube import RutubeIE
from .sina import SinaIE
from .slashdot import SlashdotIE
from .slideshare import SlideshareIE
from .sohu import SohuIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
from .southparkstudios import SouthParkStudiosIE
+from .spankwire import SpankwireIE
from .spiegel import SpiegelIE
from .stanfordoc import StanfordOpenClassroomIE
from .statigram import StatigramIE
from .steam import SteamIE
+from .sztvhu import SztvHuIE
from .teamcoco import TeamcocoIE
+from .techtalks import TechTalksIE
from .ted import TEDIE
from .tf1 import TF1IE
from .thisav import ThisAVIE
from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE
+from .tube8 import Tube8IE
from .tudou import TudouIE
from .tumblr import TumblrIE
from .tutv import TutvIE
@@ -117,16 +139,22 @@ from .veehd import VeeHDIE
from .veoh import VeohIE
from .vevo import VevoIE
from .vice import ViceIE
+from .viddler import ViddlerIE
+from .videodetective import VideoDetectiveIE
from .videofyme import VideofyMeIE
+from .videopremium import VideoPremiumIE
from .vimeo import VimeoIE, VimeoChannelIE
from .vine import VineIE
+from .vk import VKIE
from .wat import WatIE
+from .websurg import WeBSurgIE
from .weibo import WeiboIE
from .wimp import WimpIE
from .worldstarhiphop import WorldStarHipHopIE
from .xhamster import XHamsterIE
from .xnxx import XNXXIE
from .xvideos import XVideosIE
+from .xtube import XTubeIE
from .yahoo import YahooIE, YahooSearchIE
from .youjizz import YouJizzIE
from .youku import YoukuIE
@@ -135,11 +163,13 @@ from .youtube import (
YoutubeIE,
YoutubePlaylistIE,
YoutubeSearchIE,
+ YoutubeSearchDateIE,
YoutubeUserIE,
YoutubeChannelIE,
YoutubeShowIE,
YoutubeSubscriptionsIE,
YoutubeRecommendedIE,
+ YoutubeTruncatedURLIE,
YoutubeWatchLaterIE,
YoutubeFavouritesIE,
)
diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py
index 82a785a19..b99d4b966 100644
--- a/youtube_dl/extractor/addanime.py
+++ b/youtube_dl/extractor/addanime.py
@@ -17,8 +17,8 @@ class AddAnimeIE(InfoExtractor):
IE_NAME = u'AddAnime'
_TEST = {
u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
- u'file': u'24MR3YO5SAS9.flv',
- u'md5': u'1036a0e0cd307b95bd8a8c3a5c8cfaf1',
+ u'file': u'24MR3YO5SAS9.mp4',
+ u'md5': u'72954ea10bc979ab5e2eb288b21425a0',
u'info_dict': {
u"description": u"One Piece 606",
u"title": u"One Piece 606"
@@ -31,7 +31,8 @@ class AddAnimeIE(InfoExtractor):
video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id)
except ExtractorError as ee:
- if not isinstance(ee.cause, compat_HTTPError):
+ if not isinstance(ee.cause, compat_HTTPError) or \
+ ee.cause.code != 503:
raise
redir_webpage = ee.cause.read().decode('utf-8')
@@ -60,16 +61,26 @@ class AddAnimeIE(InfoExtractor):
note=u'Confirming after redirect')
webpage = self._download_webpage(url, video_id)
- video_url = self._search_regex(r"var normal_video_file = '(.*?)';",
- webpage, u'video file URL')
+ formats = []
+ for format_id in ('normal', 'hq'):
+ rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
+ video_url = self._search_regex(rex, webpage, u'video file URLx',
+ fatal=False)
+ if not video_url:
+ continue
+ formats.append({
+ 'format_id': format_id,
+ 'url': video_url,
+ })
+ if not formats:
+ raise ExtractorError(u'Cannot find any video format!')
video_title = self._og_search_title(webpage)
video_description = self._og_search_description(webpage)
return {
'_type': 'video',
'id': video_id,
- 'url': video_url,
- 'ext': 'flv',
+ 'formats': formats,
'title': video_title,
'description': video_description
}
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py
index 8b191c196..6d6237f8a 100644
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@@ -1,8 +1,10 @@
import re
import xml.etree.ElementTree
+import json
from .common import InfoExtractor
from ..utils import (
+ compat_urlparse,
determine_ext,
)
@@ -14,10 +16,9 @@ class AppleTrailersIE(InfoExtractor):
u"playlist": [
{
u"file": u"manofsteel-trailer4.mov",
- u"md5": u"11874af099d480cc09e103b189805d5f",
+ u"md5": u"d97a8e575432dbcb81b7c3acb741f8a8",
u"info_dict": {
u"duration": 111,
- u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg",
u"title": u"Trailer 4",
u"upload_date": u"20130523",
u"uploader_id": u"wb",
@@ -25,10 +26,9 @@ class AppleTrailersIE(InfoExtractor):
},
{
u"file": u"manofsteel-trailer3.mov",
- u"md5": u"07a0a262aae5afe68120eed61137ab34",
+ u"md5": u"b8017b7131b721fb4e8d6f49e1df908c",
u"info_dict": {
u"duration": 182,
- u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg",
u"title": u"Trailer 3",
u"upload_date": u"20130417",
u"uploader_id": u"wb",
@@ -36,10 +36,9 @@ class AppleTrailersIE(InfoExtractor):
},
{
u"file": u"manofsteel-trailer.mov",
- u"md5": u"e401fde0813008e3307e54b6f384cff1",
+ u"md5": u"d0f1e1150989b9924679b441f3404d48",
u"info_dict": {
u"duration": 148,
- u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg",
u"title": u"Trailer",
u"upload_date": u"20121212",
u"uploader_id": u"wb",
@@ -47,10 +46,9 @@ class AppleTrailersIE(InfoExtractor):
},
{
u"file": u"manofsteel-teaser.mov",
- u"md5": u"76b392f2ae9e7c98b22913c10a639c97",
+ u"md5": u"5fe08795b943eb2e757fa95cb6def1cb",
u"info_dict": {
u"duration": 93,
- u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg",
u"title": u"Teaser",
u"upload_date": u"20120721",
u"uploader_id": u"wb",
@@ -59,87 +57,61 @@ class AppleTrailersIE(InfoExtractor):
]
}
+ _JSON_RE = r'iTunes.playURL\((.*?)\);'
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
movie = mobj.group('movie')
uploader_id = mobj.group('company')
- playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc'
+ playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc')
playlist_snippet = self._download_webpage(playlist_url, movie)
- playlist_cleaned = re.sub(r'(?s)', u'', playlist_snippet)
+ playlist_cleaned = re.sub(r'(?s)', u'', playlist_snippet)
+ playlist_cleaned = re.sub(r'', r'', playlist_cleaned)
+ # The ' in the onClick attributes are not escaped, it couldn't be parsed
+ # with xml.etree.ElementTree.fromstring
+ # like: http://trailers.apple.com/trailers/wb/gravity/
+ def _clean_json(m):
+ return u'iTunes.playURL(%s);' % m.group(1).replace('\'', ''')
+ playlist_cleaned = re.sub(self._JSON_RE, _clean_json, playlist_cleaned)
playlist_html = u'' + playlist_cleaned + u''
- size_cache = {}
-
doc = xml.etree.ElementTree.fromstring(playlist_html)
playlist = []
for li in doc.findall('./div/ul/li'):
- title = li.find('.//h3').text
+ on_click = li.find('.//a').attrib['onClick']
+ trailer_info_json = self._search_regex(self._JSON_RE,
+ on_click, u'trailer info')
+ trailer_info = json.loads(trailer_info_json)
+ title = trailer_info['title']
video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
thumbnail = li.find('.//img').attrib['src']
+ upload_date = trailer_info['posted'].replace('-', '')
- date_el = li.find('.//p')
- upload_date = None
- m = re.search(r':\s?(?P[0-9]{2})/(?P[0-9]{2})/(?P[0-9]{2})', date_el.text)
- if m:
- upload_date = u'20' + m.group('year') + m.group('month') + m.group('day')
- runtime_el = date_el.find('./br')
- m = re.search(r':\s?(?P[0-9]+):(?P[0-9]{1,2})', runtime_el.tail)
+ runtime = trailer_info['runtime']
+ m = re.search(r'(?P[0-9]+):(?P[0-9]{1,2})', runtime)
duration = None
if m:
duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
+ first_url = trailer_info['url']
+ trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
+ settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
+ settings_json = self._download_webpage(settings_json_url, trailer_id, u'Downloading settings json')
+ settings = json.loads(settings_json)
+
formats = []
- for formats_el in li.findall('.//a'):
- if formats_el.attrib['class'] != 'OverlayPanel':
- continue
- target = formats_el.attrib['target']
-
- format_code = formats_el.text
- if 'Automatic' in format_code:
- continue
-
- size_q = formats_el.attrib['href']
- size_id = size_q.rpartition('#videos-')[2]
- if size_id not in size_cache:
- size_url = url + size_q
- sizepage_html = self._download_webpage(
- size_url, movie,
- note=u'Downloading size info %s' % size_id,
- errnote=u'Error while downloading size info %s' % size_id,
- )
- _doc = xml.etree.ElementTree.fromstring(sizepage_html)
- size_cache[size_id] = _doc
-
- sizepage_doc = size_cache[size_id]
- links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a')
- for vid_a in links:
- href = vid_a.get('href')
- if not href.endswith(target):
- continue
- detail_q = href.partition('#')[0]
- detail_url = url + '/' + detail_q
-
- m = re.match(r'includes/(?P[^/]+)/', detail_q)
- detail_id = m.group('detail_id')
-
- detail_html = self._download_webpage(
- detail_url, movie,
- note=u'Downloading detail %s %s' % (detail_id, size_id),
- errnote=u'Error while downloading detail %s %s' % (detail_id, size_id)
- )
- detail_doc = xml.etree.ElementTree.fromstring(detail_html)
- movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a')
- assert movie_link_el.get('class') == 'movieLink'
- movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h')
- ext = determine_ext(movie_link)
- assert ext == 'mov'
-
- formats.append({
- 'format': format_code,
- 'ext': ext,
- 'url': movie_link,
- })
+ for format in settings['metadata']['sizes']:
+ # The src is a file pointing to the real video file
+ format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src'])
+ formats.append({
+ 'url': format_url,
+ 'ext': determine_ext(format_url),
+ 'format': format['type'],
+ 'width': format['width'],
+ 'height': int(format['height']),
+ })
+ formats = sorted(formats, key=lambda f: (f['height'], f['width']))
info = {
'_type': 'video',
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index 69b3b0ad7..e10c74c11 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -1,3 +1,4 @@
+# encoding: utf-8
import re
import json
import xml.etree.ElementTree
@@ -7,15 +8,15 @@ from ..utils import (
ExtractorError,
find_xpath_attr,
unified_strdate,
+ determine_ext,
+ get_element_by_id,
)
+# There are different sources of video in arte.tv, the extraction process
+# is different for each one. The videos usually expire in 7 days, so we can't
+# add tests.
+
class ArteTvIE(InfoExtractor):
- """
- There are two sources of video in arte.tv: videos.arte.tv and
- www.arte.tv/guide, the extraction process is different for each one.
- The videos expire in 7 days, so we can't add tests.
- """
- _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?Pfr|de)/(?:(?:sendungen|emissions)/)?(?P.*?)/(?P.*?)(\?.*)?'
_VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?Pfr|de)/.*-(?P.*?).html'
_LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?Pfr|de)/(?P.+?)/(?P.+)'
_LIVE_URL = r'index-[0-9]+\.html$'
@@ -24,7 +25,7 @@ class ArteTvIE(InfoExtractor):
@classmethod
def suitable(cls, url):
- return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL, cls._LIVEWEB_URL))
+ return any(re.match(regex, url) for regex in (cls._VIDEOS_URL, cls._LIVEWEB_URL))
# TODO implement Live Stream
# from ..utils import compat_urllib_parse
@@ -55,14 +56,6 @@ class ArteTvIE(InfoExtractor):
# video_url = u'%s/%s' % (info.get('url'), info.get('path'))
def _real_extract(self, url):
- mobj = re.match(self._EMISSION_URL, url)
- if mobj is not None:
- lang = mobj.group('lang')
- # This is not a real id, it can be for example AJT for the news
- # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
- video_id = mobj.group('id')
- return self._extract_emission(url, video_id, lang)
-
mobj = re.match(self._VIDEOS_URL, url)
if mobj is not None:
id = mobj.group('id')
@@ -80,49 +73,6 @@ class ArteTvIE(InfoExtractor):
# self.extractLiveStream(url)
# return
- def _extract_emission(self, url, video_id, lang):
- """Extract from www.arte.tv/guide"""
- webpage = self._download_webpage(url, video_id)
- json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
-
- json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
- self.report_extraction(video_id)
- info = json.loads(json_info)
- player_info = info['videoJsonPlayer']
-
- info_dict = {'id': player_info['VID'],
- 'title': player_info['VTI'],
- 'description': player_info.get('VDE'),
- 'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]),
- 'thumbnail': player_info['programImage'],
- 'ext': 'flv',
- }
-
- formats = player_info['VSR'].values()
- def _match_lang(f):
- # Return true if that format is in the language of the url
- if lang == 'fr':
- l = 'F'
- elif lang == 'de':
- l = 'A'
- regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
- return any(re.match(r, f['versionCode']) for r in regexes)
- # Some formats may not be in the same language as the url
- formats = filter(_match_lang, formats)
- # We order the formats by quality
- formats = sorted(formats, key=lambda f: int(f['height']))
- # Prefer videos without subtitles in the same language
- formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f['versionCode']) is None)
- # Pick the best quality
- format_info = formats[-1]
- if format_info['mediaType'] == u'rtmp':
- info_dict['url'] = format_info['streamer']
- info_dict['play_path'] = 'mp4:' + format_info['url']
- else:
- info_dict['url'] = format_info['url']
-
- return info_dict
-
def _extract_video(self, url, video_id, lang):
"""Extract from videos.arte.tv"""
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
@@ -172,3 +122,130 @@ class ArteTvIE(InfoExtractor):
'ext': 'flv',
'thumbnail': self._og_search_thumbnail(webpage),
}
+
+
+class ArteTVPlus7IE(InfoExtractor):
+ IE_NAME = u'arte.tv:+7'
+ _VALID_URL = r'https?://www\.arte.tv/guide/(?Pfr|de)/(?:(?:sendungen|emissions)/)?(?P.*?)/(?P.*?)(\?.*)?'
+
+ @classmethod
+ def _extract_url_info(cls, url):
+ mobj = re.match(cls._VALID_URL, url)
+ lang = mobj.group('lang')
+ # This is not a real id, it can be for example AJT for the news
+ # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
+ video_id = mobj.group('id')
+ return video_id, lang
+
+ def _real_extract(self, url):
+ video_id, lang = self._extract_url_info(url)
+ webpage = self._download_webpage(url, video_id)
+ return self._extract_from_webpage(webpage, video_id, lang)
+
+ def _extract_from_webpage(self, webpage, video_id, lang):
+ json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
+
+ json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
+ self.report_extraction(video_id)
+ info = json.loads(json_info)
+ player_info = info['videoJsonPlayer']
+
+ info_dict = {
+ 'id': player_info['VID'],
+ 'title': player_info['VTI'],
+ 'description': player_info.get('VDE'),
+ 'upload_date': unified_strdate(player_info.get('VDA', '').split(' ')[0]),
+ 'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
+ }
+
+ all_formats = player_info['VSR'].values()
+ # Some formats use the m3u8 protocol
+ all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))
+ def _match_lang(f):
+ if f.get('versionCode') is None:
+ return True
+ # Return true if that format is in the language of the url
+ if lang == 'fr':
+ l = 'F'
+ elif lang == 'de':
+ l = 'A'
+ regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
+ return any(re.match(r, f['versionCode']) for r in regexes)
+ # Some formats may not be in the same language as the url
+ formats = filter(_match_lang, all_formats)
+ formats = list(formats) # in python3 filter returns an iterator
+ if not formats:
+ # Some videos are only available in the 'Originalversion'
+ # they aren't tagged as being in French or German
+ if all(f['versionCode'] == 'VO' for f in all_formats):
+ formats = all_formats
+ else:
+ raise ExtractorError(u'The formats list is empty')
+ # We order the formats by quality
+ if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
+ sort_key = lambda f: ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
+ else:
+ sort_key = lambda f: int(f.get('height',-1))
+ formats = sorted(formats, key=sort_key)
+ # Prefer videos without subtitles in the same language
+ formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f.get('versionCode', '')) is None)
+ # Pick the best quality
+ def _format(format_info):
+ quality = format_info['quality']
+ m_quality = re.match(r'\w*? - (\d*)p', quality)
+ if m_quality is not None:
+ quality = m_quality.group(1)
+ if format_info.get('versionCode') is not None:
+ format_id = u'%s-%s' % (quality, format_info['versionCode'])
+ else:
+ format_id = quality
+ info = {
+ 'format_id': format_id,
+ 'format_note': format_info.get('versionLibelle'),
+ 'width': format_info.get('width'),
+ 'height': format_info.get('height'),
+ }
+ if format_info['mediaType'] == u'rtmp':
+ info['url'] = format_info['streamer']
+ info['play_path'] = 'mp4:' + format_info['url']
+ info['ext'] = 'flv'
+ else:
+ info['url'] = format_info['url']
+ info['ext'] = determine_ext(info['url'])
+ return info
+ info_dict['formats'] = [_format(f) for f in formats]
+
+ return info_dict
+
+
+# It also uses the arte_vp_url url from the webpage to extract the information
+class ArteTVCreativeIE(ArteTVPlus7IE):
+ IE_NAME = u'arte.tv:creative'
+ _VALID_URL = r'https?://creative\.arte\.tv/(?Pfr|de)/magazine?/(?P.+)'
+
+ _TEST = {
+ u'url': u'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
+ u'file': u'050489-002.mp4',
+ u'info_dict': {
+ u'title': u'Agentur Amateur / Agence Amateur #2 : Corporate Design',
+ },
+ }
+
+
+class ArteTVFutureIE(ArteTVPlus7IE):
+ IE_NAME = u'arte.tv:future'
+ _VALID_URL = r'https?://future\.arte\.tv/(?Pfr|de)/(thema|sujet)/.*?#article-anchor-(?P\d+)'
+
+ _TEST = {
+ u'url': u'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
+ u'file': u'050940-003.mp4',
+ u'info_dict': {
+ u'title': u'Les champignons au secours de la planète',
+ },
+ }
+
+ def _real_extract(self, url):
+ anchor_id, lang = self._extract_url_info(url)
+ webpage = self._download_webpage(url, anchor_id)
+ row = get_element_by_id(anchor_id, webpage)
+ return self._extract_from_webpage(row, anchor_id, lang)
diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py
new file mode 100644
index 000000000..f3b36f473
--- /dev/null
+++ b/youtube_dl/extractor/bambuser.py
@@ -0,0 +1,80 @@
+import re
+import json
+import itertools
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_urllib_request,
+)
+
+
+class BambuserIE(InfoExtractor):
+ IE_NAME = u'bambuser'
+ _VALID_URL = r'https?://bambuser\.com/v/(?P\d+)'
+ _API_KEY = '005f64509e19a868399060af746a00aa'
+
+ _TEST = {
+ u'url': u'http://bambuser.com/v/4050584',
+ u'md5': u'fba8f7693e48fd4e8641b3fd5539a641',
+ u'info_dict': {
+ u'id': u'4050584',
+ u'ext': u'flv',
+ u'title': u'Education engineering days - lightning talks',
+ u'duration': 3741,
+ u'uploader': u'pixelversity',
+ u'uploader_id': u'344706',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ info_url = ('http://player-c.api.bambuser.com/getVideo.json?'
+ '&api_key=%s&vid=%s' % (self._API_KEY, video_id))
+ info_json = self._download_webpage(info_url, video_id)
+ info = json.loads(info_json)['result']
+
+ return {
+ 'id': video_id,
+ 'title': info['title'],
+ 'url': info['url'],
+ 'thumbnail': info.get('preview'),
+ 'duration': int(info['length']),
+ 'view_count': int(info['views_total']),
+ 'uploader': info['username'],
+ 'uploader_id': info['uid'],
+ }
+
+
+class BambuserChannelIE(InfoExtractor):
+ IE_NAME = u'bambuser:channel'
+ _VALID_URL = r'http://bambuser.com/channel/(?P.*?)(?:/|#|\?|$)'
+ # The maximum number we can get with each request
+ _STEP = 50
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ user = mobj.group('user')
+ urls = []
+ last_id = ''
+ for i in itertools.count(1):
+ req_url = ('http://bambuser.com/xhr-api/index.php?username={user}'
+ '&sort=created&access_mode=0%2C1%2C2&limit={count}'
+ '&method=broadcast&format=json&vid_older_than={last}'
+ ).format(user=user, count=self._STEP, last=last_id)
+ req = compat_urllib_request.Request(req_url)
+ # Without setting this header, we wouldn't get any result
+ req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
+ info_json = self._download_webpage(req, user,
+ u'Downloading page %d' % i)
+ results = json.loads(info_json)['result']
+ if len(results) == 0:
+ break
+ last_id = results[-1]['vid']
+ urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
+
+ return {
+ '_type': 'playlist',
+ 'title': user,
+ 'entries': urls,
+ }
diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py
index 08b28c994..493504f75 100644
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@@ -115,7 +115,7 @@ class BlipTVIE(InfoExtractor):
ext = umobj.group(1)
info = {
- 'id': data['item_id'],
+ 'id': compat_str(data['item_id']),
'url': video_url,
'uploader': data['display_name'],
'upload_date': upload_date,
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index 558b3d009..0d9b87a34 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -23,7 +23,7 @@ class BrightcoveIE(InfoExtractor):
# From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
u'file': u'2371591881001.mp4',
- u'md5': u'9e80619e0a94663f0bdc849b4566af19',
+ u'md5': u'8eccab865181d29ec2958f32a6a754f5',
u'note': u'Test Brightcove downloads and detection in GenericIE',
u'info_dict': {
u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
@@ -49,6 +49,13 @@ class BrightcoveIE(InfoExtractor):
Build a Brightcove url from a xml string containing
"""
+
+ # Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553
+ object_str = re.sub(r'(',
+ lambda m: m.group(1) + '/>', object_str)
+ # Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
+ object_str = object_str.replace(u'<--', u'