diff --git a/.gitignore b/.gitignore
index 24fdb3626..7dd0ad09b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,3 +25,4 @@ updates_key.pem
*.mp4
*.part
test/testdata
+.tox
diff --git a/README.md b/README.md
index 14d62b189..8824daee2 100644
--- a/README.md
+++ b/README.md
@@ -52,6 +52,9 @@ which means you can modify it, redistribute it or use it however you like.
--datebefore DATE download only videos uploaded before this date
--dateafter DATE download only videos uploaded after this date
--no-playlist download only the currently playing video
+ --age-limit YEARS download only videos suitable for the given age
+ --download-archive FILE Download only videos not present in the archive
+ file. Record all downloaded videos in it.
## Download Options:
-r, --rate-limit LIMIT maximum download rate (e.g. 50k or 44.6m)
diff --git a/test/helper.py b/test/helper.py
index a2b468b50..ad1b74dd3 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -1,6 +1,9 @@
+import errno
import io
import json
import os.path
+import re
+import types
import youtube_dl.extractor
from youtube_dl import YoutubeDL, YoutubeDLHandler
@@ -9,30 +12,47 @@ from youtube_dl.utils import (
compat_urllib_request,
)
-# General configuration (from __init__, not very elegant...)
-jar = compat_cookiejar.CookieJar()
-cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
-proxy_handler = compat_urllib_request.ProxyHandler()
-opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
-compat_urllib_request.install_opener(opener)
+youtube_dl._setup_opener(timeout=10)
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
parameters = json.load(pf)
+
+def try_rm(filename):
+ """ Remove a file if it exists """
+ try:
+ os.remove(filename)
+ except OSError as ose:
+ if ose.errno != errno.ENOENT:
+ raise
+
+
class FakeYDL(YoutubeDL):
def __init__(self):
- self.result = []
# Different instances of the downloader can't share the same dictionary
# some test set the "sublang" parameter, which would break the md5 checks.
- self.params = dict(parameters)
- def to_screen(self, s):
+ params = dict(parameters)
+ super(FakeYDL, self).__init__(params)
+ self.result = []
+
+ def to_screen(self, s, skip_eol=None):
print(s)
+
def trouble(self, s, tb=None):
raise Exception(s)
+
def download(self, x):
self.result.append(x)
+ def expect_warning(self, regex):
+ # Silence an expected warning matching a regex
+ old_report_warning = self.report_warning
+ def report_warning(self, message):
+ if re.match(regex, message): return
+ old_report_warning(message)
+ self.report_warning = types.MethodType(report_warning, self)
+
def get_testcases():
for ie in youtube_dl.extractor.gen_extractors():
t = getattr(ie, '_TEST', None)
diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py
new file mode 100644
index 000000000..943f9a315
--- /dev/null
+++ b/test/test_age_restriction.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+
+import sys
+import unittest
+
+# Allow direct execution
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl import YoutubeDL
+from helper import try_rm
+
+
+def _download_restricted(url, filename, age):
+ """ Returns true iff the file has been downloaded """
+
+ params = {
+ 'age_limit': age,
+ 'skip_download': True,
+ 'writeinfojson': True,
+ "outtmpl": "%(id)s.%(ext)s",
+ }
+ ydl = YoutubeDL(params)
+ ydl.add_default_info_extractors()
+ json_filename = filename + '.info.json'
+ try_rm(json_filename)
+ ydl.download([url])
+ res = os.path.exists(json_filename)
+ try_rm(json_filename)
+ return res
+
+
+class TestAgeRestriction(unittest.TestCase):
+ def _assert_restricted(self, url, filename, age, old_age=None):
+ self.assertTrue(_download_restricted(url, filename, old_age))
+ self.assertFalse(_download_restricted(url, filename, age))
+
+ def test_youtube(self):
+ self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10)
+
+ def test_youporn(self):
+ self._assert_restricted(
+ 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
+ '505835.mp4', 2, old_age=25)
+
+ def test_pornotube(self):
+ self._assert_restricted(
+ 'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
+ '1689755.flv', 13)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_dailymotion_subtitles.py b/test/test_dailymotion_subtitles.py
index 83c65d57e..ed2ad311d 100644
--- a/test/test_dailymotion_subtitles.py
+++ b/test/test_dailymotion_subtitles.py
@@ -2,8 +2,6 @@
import sys
import unittest
-import json
-import io
import hashlib
# Allow direct execution
@@ -45,15 +43,18 @@ class TestDailymotionSubtitles(unittest.TestCase):
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 5)
def test_list_subtitles(self):
+ self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
+ self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
def test_nosubtitles(self):
+ self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
diff --git a/test/test_download.py b/test/test_download.py
index 23a66254d..fdf59bb5c 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -1,6 +1,5 @@
#!/usr/bin/env python
-import errno
import hashlib
import io
import os
@@ -20,22 +19,6 @@ PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "para
RETRIES = 3
-# General configuration (from __init__, not very elegant...)
-jar = compat_cookiejar.CookieJar()
-cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
-proxy_handler = compat_urllib_request.ProxyHandler()
-opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
-compat_urllib_request.install_opener(opener)
-socket.setdefaulttimeout(10)
-
-def _try_rm(filename):
- """ Remove a file if it exists """
- try:
- os.remove(filename)
- except OSError as ose:
- if ose.errno != errno.ENOENT:
- raise
-
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
class YoutubeDL(youtube_dl.YoutubeDL):
@@ -54,7 +37,8 @@ def _file_md5(fn):
with open(fn, 'rb') as f:
return hashlib.md5(f.read()).hexdigest()
-from helper import get_testcases
+import helper # Set up remaining global configuration
+from helper import get_testcases, try_rm
defs = get_testcases()
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
@@ -97,9 +81,9 @@ def generator(test_case):
test_cases = test_case.get('playlist', [test_case])
for tc in test_cases:
- _try_rm(tc['file'])
- _try_rm(tc['file'] + '.part')
- _try_rm(tc['file'] + '.info.json')
+ try_rm(tc['file'])
+ try_rm(tc['file'] + '.part')
+ try_rm(tc['file'] + '.info.json')
try:
for retry in range(1, RETRIES + 1):
try:
@@ -145,9 +129,9 @@ def generator(test_case):
self.assertTrue(key in info_dict.keys() and info_dict[key])
finally:
for tc in test_cases:
- _try_rm(tc['file'])
- _try_rm(tc['file'] + '.part')
- _try_rm(tc['file'] + '.info.json')
+ try_rm(tc['file'])
+ try_rm(tc['file'] + '.part')
+ try_rm(tc['file'] + '.info.json')
return test_template
diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py
index 168e6c66c..f9b0c1ad0 100644
--- a/test/test_youtube_subtitles.py
+++ b/test/test_youtube_subtitles.py
@@ -2,8 +2,6 @@
import sys
import unittest
-import json
-import io
import hashlib
# Allow direct execution
@@ -56,6 +54,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
def test_youtube_list_subtitles(self):
+ self.DL.expect_warning(u'Video doesn\'t have automatic captions')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
@@ -66,6 +65,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
subtitles = self.getSubtitles()
self.assertTrue(subtitles['it'] is not None)
def test_youtube_nosubtitles(self):
+ self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'sAjKT8FhjI8'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 000000000..53b461fdb
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,5 @@
+[tox]
+envlist = py26,py27,py33
+[testenv]
+deps = nose
+commands = nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose test
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 2503fd09b..073a3837c 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -3,6 +3,7 @@
from __future__ import absolute_import
+import errno
import io
import os
import re
@@ -84,6 +85,11 @@ class YoutubeDL(object):
cachedir: Location of the cache files in the filesystem.
None to disable filesystem cache.
noplaylist: Download single video instead of a playlist if in doubt.
+ age_limit: An integer representing the user's age in years.
+ Unsuitable videos for the given age are skipped.
+ downloadarchive: File name of a file where all downloads are recorded.
+ Videos already present in the file are not downloaded
+ again.
The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader:
@@ -309,6 +315,13 @@ class YoutubeDL(object):
dateRange = self.params.get('daterange', DateRange())
if date not in dateRange:
return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
+ age_limit = self.params.get('age_limit')
+ if age_limit is not None:
+ if age_limit < info_dict.get('age_limit', 0):
+ return u'Skipping "' + title + '" because it is age restricted'
+ if self.in_download_archive(info_dict):
+ return (u'%(title)s has already been recorded in archive'
+ % info_dict)
return None
def extract_info(self, url, download=True, ie_key=None, extra_info={}):
@@ -578,6 +591,8 @@ class YoutubeDL(object):
self.report_error(u'postprocessing: %s' % str(err))
return
+ self.record_download_archive(info_dict)
+
def download(self, url_list):
"""Download a given list of URLs."""
if len(url_list) > 1 and self.fixed_template():
@@ -617,3 +632,26 @@ class YoutubeDL(object):
os.remove(encodeFilename(filename))
except (IOError, OSError):
self.report_warning(u'Unable to remove downloaded video file')
+
+ def in_download_archive(self, info_dict):
+ fn = self.params.get('download_archive')
+ if fn is None:
+ return False
+ vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+ try:
+ with locked_file(fn, 'r', encoding='utf-8') as archive_file:
+ for line in archive_file:
+ if line.strip() == vid_id:
+ return True
+ except IOError as ioe:
+ if ioe.errno != errno.ENOENT:
+ raise
+ return False
+
+ def record_download_archive(self, info_dict):
+ fn = self.params.get('download_archive')
+ if fn is None:
+ return
+ vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+ with locked_file(fn, 'a', encoding='utf-8') as archive_file:
+ archive_file.write(vid_id + u'\n')
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index cbb6c92c2..5cd708886 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -36,6 +36,7 @@ __authors__ = (
__license__ = 'Public Domain'
import codecs
+import collections
import getpass
import optparse
import os
@@ -188,6 +189,12 @@ def parseOpts(overrideArguments=None):
selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
+ selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
+ help='download only videos suitable for the given age',
+ default=None, type=int)
+ selection.add_option('--download-archive', metavar='FILE',
+ dest='download_archive',
+ help='Download only videos not present in the archive file. Record all downloaded videos in it.')
authentication.add_option('-u', '--username',
@@ -445,27 +452,7 @@ def _real_main(argv=None):
all_urls = batchurls + args
all_urls = [url.strip() for url in all_urls]
- # General configuration
- cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
- if opts.proxy is not None:
- if opts.proxy == '':
- proxies = {}
- else:
- proxies = {'http': opts.proxy, 'https': opts.proxy}
- else:
- proxies = compat_urllib_request.getproxies()
- # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
- if 'http' in proxies and 'https' not in proxies:
- proxies['https'] = proxies['http']
- proxy_handler = compat_urllib_request.ProxyHandler(proxies)
- https_handler = make_HTTPS_handler(opts)
- opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
- # Delete the default user-agent header, which would otherwise apply in
- # cases where our custom HTTP handler doesn't come into play
- # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
- opener.addheaders =[]
- compat_urllib_request.install_opener(opener)
- socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
+ opener = _setup_opener(jar=jar, opts=opts)
extractors = gen_extractors()
@@ -482,6 +469,8 @@ def _real_main(argv=None):
if not ie._WORKING:
continue
desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
+ if desc is False:
+ continue
if hasattr(ie, 'SEARCH_KEY'):
_SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise')
_COUNTS = (u'', u'5', u'10', u'all')
@@ -635,6 +624,8 @@ def _real_main(argv=None):
'daterange': date,
'cachedir': opts.cachedir,
'youtube_print_sig_code': opts.youtube_print_sig_code,
+ 'age_limit': opts.age_limit,
+ 'download_archive': opts.download_archive,
})
if opts.verbose:
@@ -654,7 +645,12 @@ def _real_main(argv=None):
except:
pass
write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
- write_string(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
+
+ proxy_map = {}
+ for handler in opener.handlers:
+ if hasattr(handler, 'proxies'):
+ proxy_map.update(handler.proxies)
+ write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
ydl.add_default_info_extractors()
@@ -692,6 +688,37 @@ def _real_main(argv=None):
sys.exit(retcode)
+
+def _setup_opener(jar=None, opts=None, timeout=300):
+ if opts is None:
+ FakeOptions = collections.namedtuple(
+ 'FakeOptions', ['proxy', 'no_check_certificate'])
+ opts = FakeOptions(proxy=None, no_check_certificate=False)
+
+ cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
+ if opts.proxy is not None:
+ if opts.proxy == '':
+ proxies = {}
+ else:
+ proxies = {'http': opts.proxy, 'https': opts.proxy}
+ else:
+ proxies = compat_urllib_request.getproxies()
+ # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
+ if 'http' in proxies and 'https' not in proxies:
+ proxies['https'] = proxies['http']
+ proxy_handler = compat_urllib_request.ProxyHandler(proxies)
+ https_handler = make_HTTPS_handler(opts)
+ opener = compat_urllib_request.build_opener(
+ https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
+ # Delete the default user-agent header, which would otherwise apply in
+ # cases where our custom HTTP handler doesn't come into play
+ # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
+ opener.addheaders = []
+ compat_urllib_request.install_opener(opener)
+ socket.setdefaulttimeout(timeout)
+ return opener
+
+
def main(argv=None):
try:
_real_main(argv)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index d1b7e5f99..c01de6b5e 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -117,6 +117,7 @@ from .veehd import VeeHDIE
from .veoh import VeohIE
from .vevo import VevoIE
from .vice import ViceIE
+from .viddler import ViddlerIE
from .videofyme import VideofyMeIE
from .vimeo import VimeoIE, VimeoChannelIE
from .vine import VineIE
@@ -140,6 +141,7 @@ from .youtube import (
YoutubeShowIE,
YoutubeSubscriptionsIE,
YoutubeRecommendedIE,
+ YoutubeTruncatedURLIE,
YoutubeWatchLaterIE,
YoutubeFavouritesIE,
)
diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py
index 08b28c994..493504f75 100644
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@@ -115,7 +115,7 @@ class BlipTVIE(InfoExtractor):
ext = umobj.group(1)
info = {
- 'id': data['item_id'],
+ 'id': compat_str(data['item_id']),
'url': video_url,
'uploader': data['display_name'],
'upload_date': upload_date,
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 69cdcdc1b..2a5a85dc6 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -54,6 +54,7 @@ class InfoExtractor(object):
view_count: How many users have watched the video on the platform.
urlhandle: [internal] The urlHandle to be used to download the file,
like returned by urllib.request.urlopen
+ age_limit: Age restriction for the video, as an integer (years)
formats: A list of dictionaries for each format available, it must
be ordered from worst to best quality. Potential fields:
* url Mandatory. The URL of the video file
@@ -318,6 +319,15 @@ class InfoExtractor(object):
self._og_regex('video')],
html, name, **kargs)
+ def _rta_search(self, html):
+ # See http://www.rtalabel.org/index.php?content=howtofaq#single
+ if re.search(r'(?ix).*?(.*?)', r'(.+?)'],
+ r'''(?x)
+ ([0-9]{4}-[0-9]{2}-[0-9]{2})''',
webpage, u'upload date', fatal=False)
if upload_date:
# Convert timestring to a format suitable for filename
diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py
index ae2e37a70..6bb54b932 100644
--- a/youtube_dl/extractor/jeuxvideo.py
+++ b/youtube_dl/extractor/jeuxvideo.py
@@ -6,6 +6,7 @@ import xml.etree.ElementTree
from .common import InfoExtractor
+
class JeuxVideoIE(InfoExtractor):
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
@@ -23,25 +24,29 @@ class JeuxVideoIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
title = re.match(self._VALID_URL, url).group(1)
webpage = self._download_webpage(url, title)
- m_download = re.search(r'', webpage)
-
- xml_link = m_download.group(1)
+ xml_link = self._html_search_regex(
+ r'',
+ webpage, u'config URL')
- id = re.search(r'http://www.jeuxvideo.com/config/\w+/0011/(.*?)/\d+_player\.xml', xml_link).group(1)
+ video_id = self._search_regex(
+ r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
+ xml_link, u'video ID')
- xml_config = self._download_webpage(xml_link, title,
- 'Downloading XML config')
+ xml_config = self._download_webpage(
+ xml_link, title, u'Downloading XML config')
config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
- info = re.search(r'(.*?)',
- xml_config, re.MULTILINE|re.DOTALL).group(1)
- info = json.loads(info)['versions'][0]
+ info_json = self._search_regex(
+ r'(?sm)(.*?)',
+ xml_config, u'JSON information')
+ info = json.loads(info_json)['versions'][0]
video_url = 'http://video720.jeuxvideo.com/' + info['file']
- return {'id': id,
- 'title' : config.find('titre_video').text,
- 'ext' : 'mp4',
- 'url' : video_url,
- 'description': self._og_search_description(webpage),
- 'thumbnail': config.find('image').text,
- }
+ return {
+ 'id': video_id,
+ 'title': config.find('titre_video').text,
+ 'ext': 'mp4',
+ 'url': video_url,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': config.find('image').text,
+ }
diff --git a/youtube_dl/extractor/pornotube.py b/youtube_dl/extractor/pornotube.py
index add76a11e..5d770ec28 100644
--- a/youtube_dl/extractor/pornotube.py
+++ b/youtube_dl/extractor/pornotube.py
@@ -38,6 +38,7 @@ class PornotubeIE(InfoExtractor):
VIDEO_UPLOADED_RE = r'Added (?P
[0-9\/]+) by'
upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
if upload_date: upload_date = unified_strdate(upload_date)
+ age_limit = self._rta_search(webpage)
info = {'id': video_id,
'url': video_url,
@@ -45,6 +46,7 @@ class PornotubeIE(InfoExtractor):
'upload_date': upload_date,
'title': video_title,
'ext': 'flv',
- 'format': 'flv'}
+ 'format': 'flv',
+ 'age_limit': age_limit}
return [info]
diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py
index bb19b898a..365aade56 100644
--- a/youtube_dl/extractor/redtube.py
+++ b/youtube_dl/extractor/redtube.py
@@ -30,9 +30,14 @@ class RedTubeIE(InfoExtractor):
r'(.+?)
',
webpage, u'title')
+ # No self-labeling, but they describe themselves as
+ # "Home of Videos Porno"
+ age_limit = 18
+
return {
- 'id': video_id,
- 'url': video_url,
- 'ext': video_extension,
- 'title': video_title,
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': video_extension,
+ 'title': video_title,
+ 'age_limit': age_limit,
}
diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py
new file mode 100644
index 000000000..12c84a985
--- /dev/null
+++ b/youtube_dl/extractor/viddler.py
@@ -0,0 +1,64 @@
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+)
+
+
+class ViddlerIE(InfoExtractor):
+ _VALID_URL = r'(?Phttps?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P[0-9]+)'
+ _TEST = {
+ u"url": u"http://www.viddler.com/v/43903784",
+ u'file': u'43903784.mp4',
+ u'md5': u'fbbaedf7813e514eb7ca30410f439ac9',
+ u'info_dict': {
+ u"title": u"Video Made Easy",
+ u"uploader": u"viddler",
+ u"duration": 100.89,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ embed_url = mobj.group('domain') + u'/embed/' + video_id
+ webpage = self._download_webpage(embed_url, video_id)
+
+ video_sources_code = self._search_regex(
+ r"(?ms)sources\s*:\s*(\{.*?\})", webpage, u'video URLs')
+ video_sources = json.loads(video_sources_code.replace("'", '"'))
+
+ formats = [{
+ 'url': video_url,
+ 'format': format_id,
+ } for video_url, format_id in video_sources.items()]
+
+ title = self._html_search_regex(
+ r"title\s*:\s*'([^']*)'", webpage, u'title')
+ uploader = self._html_search_regex(
+ r"authorName\s*:\s*'([^']*)'", webpage, u'uploader', fatal=False)
+ duration_s = self._html_search_regex(
+ r"duration\s*:\s*([0-9.]*)", webpage, u'duration', fatal=False)
+ duration = float(duration_s) if duration_s else None
+ thumbnail = self._html_search_regex(
+ r"thumbnail\s*:\s*'([^']*)'",
+ webpage, u'thumbnail', fatal=False)
+
+ info = {
+ '_type': 'video',
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'duration': duration,
+ 'formats': formats,
+ }
+
+ # TODO: Remove when #980 has been merged
+ info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url'])
+ info.update(info['formats'][-1])
+
+ return info
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 4a7d82b7a..cea29f035 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -17,7 +17,7 @@ class VimeoIE(InfoExtractor):
"""Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs
- _VALID_URL = r'(?Phttps?://)?(?:(?:www|player)\.)?vimeo(?Ppro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)(?:[?].*)?$'
+ _VALID_URL = r'(?Phttps?://)?(?:(?:www|player)\.)?vimeo(?Ppro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)/?(?:[?].*)?$'
_NETRC_MACHINE = 'vimeo'
IE_NAME = u'vimeo'
_TESTS = [
diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py
index c85fd4b5a..b1f93dd1b 100644
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -51,6 +51,7 @@ class YouPornIE(InfoExtractor):
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
+ age_limit = self._rta_search(webpage)
# Get JSON parameters
json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
@@ -115,7 +116,8 @@ class YouPornIE(InfoExtractor):
'ext': extension,
'format': format,
'thumbnail': thumbnail,
- 'description': video_description
+ 'description': video_description,
+ 'age_limit': age_limit,
})
if self._downloader.params.get('listformats', None):
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 1101011ea..35310b39f 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1250,9 +1250,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return url_map
def _real_extract(self, url):
- if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
- self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).')
-
# Extract original video URL from URL with redirection, like age verification, using next_url parameter
mobj = re.search(self._NEXT_URL_RE, url)
if mobj:
@@ -1495,7 +1492,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'description': video_description,
'player_url': player_url,
'subtitles': video_subtitles,
- 'duration': video_duration
+ 'duration': video_duration,
+ 'age_limit': 18 if age_gate else 0,
})
return results
@@ -1636,7 +1634,7 @@ class YoutubeChannelIE(InfoExtractor):
class YoutubeUserIE(InfoExtractor):
IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
- _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
+ _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!watch(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
_TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
_GDATA_PAGE_SIZE = 50
_GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
@@ -1829,3 +1827,18 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
return self.url_result(playlist_id, 'YoutubePlaylist')
+
+
+class YoutubeTruncatedURLIE(InfoExtractor):
+ IE_NAME = 'youtube:truncated_url'
+ IE_DESC = False # Do not list
+ _VALID_URL = r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$'
+
+ def _real_extract(self, url):
+ raise ExtractorError(
+ u'Did you forget to quote the URL? Remember that & is a meta '
+ u'character in most shells, so you want to put the URL in quotes, '
+ u'like youtube-dl '
+ u'\'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\''
+ u' (or simply youtube-dl BaW_jenozKc ).',
+ expected=True)
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index f5f9cde99..de2654762 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -175,7 +175,7 @@ def compat_ord(c):
compiled_regex_type = type(re.compile(''))
std_headers = {
- 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
+ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
@@ -830,3 +830,99 @@ def get_cachedir(params={}):
cache_root = os.environ.get('XDG_CACHE_HOME',
os.path.expanduser('~/.cache'))
return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
+
+
+# Cross-platform file locking
+if sys.platform == 'win32':
+ import ctypes.wintypes
+ import msvcrt
+
+ class OVERLAPPED(ctypes.Structure):
+ _fields_ = [
+ ('Internal', ctypes.wintypes.LPVOID),
+ ('InternalHigh', ctypes.wintypes.LPVOID),
+ ('Offset', ctypes.wintypes.DWORD),
+ ('OffsetHigh', ctypes.wintypes.DWORD),
+ ('hEvent', ctypes.wintypes.HANDLE),
+ ]
+
+ kernel32 = ctypes.windll.kernel32
+ LockFileEx = kernel32.LockFileEx
+ LockFileEx.argtypes = [
+ ctypes.wintypes.HANDLE, # hFile
+ ctypes.wintypes.DWORD, # dwFlags
+ ctypes.wintypes.DWORD, # dwReserved
+ ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
+ ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
+ ctypes.POINTER(OVERLAPPED) # Overlapped
+ ]
+ LockFileEx.restype = ctypes.wintypes.BOOL
+ UnlockFileEx = kernel32.UnlockFileEx
+ UnlockFileEx.argtypes = [
+ ctypes.wintypes.HANDLE, # hFile
+ ctypes.wintypes.DWORD, # dwReserved
+ ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
+ ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
+ ctypes.POINTER(OVERLAPPED) # Overlapped
+ ]
+ UnlockFileEx.restype = ctypes.wintypes.BOOL
+ whole_low = 0xffffffff
+ whole_high = 0x7fffffff
+
+ def _lock_file(f, exclusive):
+ overlapped = OVERLAPPED()
+ overlapped.Offset = 0
+ overlapped.OffsetHigh = 0
+ overlapped.hEvent = 0
+ f._lock_file_overlapped_p = ctypes.pointer(overlapped)
+ handle = msvcrt.get_osfhandle(f.fileno())
+ if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
+ whole_low, whole_high, f._lock_file_overlapped_p):
+ raise OSError('Locking file failed: %r' % ctypes.FormatError())
+
+ def _unlock_file(f):
+ assert f._lock_file_overlapped_p
+ handle = msvcrt.get_osfhandle(f.fileno())
+ if not UnlockFileEx(handle, 0,
+ whole_low, whole_high, f._lock_file_overlapped_p):
+ raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
+
+else:
+ import fcntl
+
+ def _lock_file(f, exclusive):
+ fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
+
+ def _unlock_file(f):
+ fcntl.lockf(f, fcntl.LOCK_UN)
+
+
+class locked_file(object):
+ def __init__(self, filename, mode, encoding=None):
+ assert mode in ['r', 'a', 'w']
+ self.f = io.open(filename, mode, encoding=encoding)
+ self.mode = mode
+
+ def __enter__(self):
+ exclusive = self.mode != 'r'
+ try:
+ _lock_file(self.f, exclusive)
+ except IOError:
+ self.f.close()
+ raise
+ return self
+
+ def __exit__(self, etype, value, traceback):
+ try:
+ _unlock_file(self.f)
+ finally:
+ self.f.close()
+
+ def __iter__(self):
+ return iter(self.f)
+
+ def write(self, *args):
+ return self.f.write(*args)
+
+ def read(self, *args):
+ return self.f.read(*args)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index e773e82da..8b4f03308 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.10.04'
+__version__ = '2013.10.07'