diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 2d80d45b6..e3de48eb5 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21.2** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.30.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.30.2** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.21.2 +[debug] youtube-dl version 2016.05.30.2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.gitignore b/.gitignore index d5f216b5f..a802c75a1 100644 --- a/.gitignore +++ b/.gitignore @@ -28,12 +28,16 @@ updates_key.pem *.mp4 *.m4a *.m4v +*.mp3 *.part *.swp test/testdata test/local_parameters.json .tox youtube-dl.zsh + +# IntelliJ related files .idea -.idea/* +*.iml + tmp/ diff --git a/.travis.yml b/.travis.yml index 998995845..136c339f0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,7 +14,6 @@ script: nosetests test --verbose notifications: email: - filippo.valsorda@gmail.com - - phihag@phihag.de - yasoob.khld@gmail.com # irc: # channels: diff --git a/Makefile b/Makefile index d760e4576..6ee4ba4eb 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete find . -name "*.class" -delete @@ -69,7 +69,7 @@ README.txt: README.md pandoc -f markdown -t plain README.md -o README.txt youtube-dl.1: README.md - $(PYTHON) devscripts/prepare_manpage.py >youtube-dl.1.temp.md + $(PYTHON) devscripts/prepare_manpage.py youtube-dl.1.temp.md pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1 rm -f youtube-dl.1.temp.md diff --git a/README.md b/README.md index 58e8be5d9..7e18112de 100644 --- a/README.md +++ b/README.md @@ -785,9 +785,9 @@ means you're using an outdated version of Python. Please update to Python 2.6 or Since June 2012 ([#342](https://github.com/rg3/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`. -### The exe throws a *Runtime error from Visual C++* +### The exe throws an error due to missing `MSVCR100.dll` -To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29). +To run the exe you need to install first the [Microsoft Visual C++ 2010 Redistributable Package (x86)](https://www.microsoft.com/en-US/download/details.aspx?id=5555). ### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files? diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py index 7c2f49f8b..f7979c43e 100644 --- a/devscripts/buildserver.py +++ b/devscripts/buildserver.py @@ -1,17 +1,42 @@ #!/usr/bin/python3 -from http.server import HTTPServer, BaseHTTPRequestHandler -from socketserver import ThreadingMixIn import argparse import ctypes import functools +import shutil +import subprocess import sys +import tempfile import threading import traceback import os.path +sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__))))) +from youtube_dl.compat import ( + compat_http_server, + compat_str, + compat_urlparse, +) -class BuildHTTPServer(ThreadingMixIn, HTTPServer): +# These are not used outside of buildserver.py thus not in compat.py + +try: + import winreg as compat_winreg +except ImportError: # Python 2 + import _winreg as compat_winreg + +try: + import socketserver as compat_socketserver +except ImportError: # Python 2 + import SocketServer as compat_socketserver + +try: + compat_input = raw_input +except NameError: # Python 3 + compat_input = input + + +class BuildHTTPServer(compat_socketserver.ThreadingMixIn, compat_http_server.HTTPServer): allow_reuse_address = True @@ -191,7 +216,7 @@ def main(args=None): action='store_const', dest='action', const='service', help='Run as a Windows service') parser.add_argument('-b', '--bind', metavar='', - action='store', default='localhost:8142', + action='store', default='0.0.0.0:8142', help='Bind to host:port (default %default)') options = parser.parse_args(args=args) @@ -216,7 +241,7 @@ def main(args=None): srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler) thr = threading.Thread(target=srv.serve_forever) thr.start() - input('Press ENTER to shut down') + compat_input('Press ENTER to shut down') srv.shutdown() thr.join() @@ -231,8 +256,6 @@ def rmtree(path): os.remove(fname) os.rmdir(path) -#============================================================================== - class BuildError(Exception): def __init__(self, output, code=500): @@ -249,15 +272,25 @@ class HTTPError(BuildError): class PythonBuilder(object): def __init__(self, **kwargs): - pythonVersion = kwargs.pop('python', '2.7') - try: - key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % pythonVersion) + python_version = kwargs.pop('python', '3.4') + python_path = None + for node in ('Wow6432Node\\', ''): try: - self.pythonPath, _ = _winreg.QueryValueEx(key, '') - finally: - _winreg.CloseKey(key) - except Exception: - raise BuildError('No such Python version: %s' % pythonVersion) + key = compat_winreg.OpenKey( + compat_winreg.HKEY_LOCAL_MACHINE, + r'SOFTWARE\%sPython\PythonCore\%s\InstallPath' % (node, python_version)) + try: + python_path, _ = compat_winreg.QueryValueEx(key, '') + finally: + compat_winreg.CloseKey(key) + break + except Exception: + pass + + if not python_path: + raise BuildError('No such Python version: %s' % python_version) + + self.pythonPath = python_path super(PythonBuilder, self).__init__(**kwargs) @@ -305,8 +338,10 @@ class YoutubeDLBuilder(object): def build(self): try: - subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], - cwd=self.buildPath) + proc = subprocess.Popen([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], stdin=subprocess.PIPE, cwd=self.buildPath) + proc.wait() + #subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], + # cwd=self.buildPath) except subprocess.CalledProcessError as e: raise BuildError(e.output) @@ -369,12 +404,12 @@ class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, Clea pass -class BuildHTTPRequestHandler(BaseHTTPRequestHandler): +class BuildHTTPRequestHandler(compat_http_server.BaseHTTPRequestHandler): actionDict = {'build': Builder, 'download': Builder} # They're the same, no more caching. def do_GET(self): - path = urlparse.urlparse(self.path) - paramDict = dict([(key, value[0]) for key, value in urlparse.parse_qs(path.query).items()]) + path = compat_urlparse.urlparse(self.path) + paramDict = dict([(key, value[0]) for key, value in compat_urlparse.parse_qs(path.query).items()]) action, _, path = path.path.strip('/').partition('/') if path: path = path.split('/') @@ -388,7 +423,7 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler): builder.close() except BuildError as e: self.send_response(e.code) - msg = unicode(e).encode('UTF-8') + msg = compat_str(e).encode('UTF-8') self.send_header('Content-Type', 'text/plain; charset=UTF-8') self.send_header('Content-Length', len(msg)) self.end_headers() @@ -400,7 +435,5 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler): else: self.send_response(500, 'Malformed URL') -#============================================================================== - if __name__ == '__main__': main() diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py index 776e6556e..e3f6339b5 100644 --- a/devscripts/prepare_manpage.py +++ b/devscripts/prepare_manpage.py @@ -1,13 +1,46 @@ from __future__ import unicode_literals import io +import optparse import os.path -import sys import re ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) README_FILE = os.path.join(ROOT_DIR, 'README.md') +PREFIX = '''%YOUTUBE-DL(1) + +# NAME + +youtube\-dl \- download videos from youtube.com or other video platforms + +# SYNOPSIS + +**youtube-dl** \[OPTIONS\] URL [URL...] + +''' + + +def main(): + parser = optparse.OptionParser(usage='%prog OUTFILE.md') + options, args = parser.parse_args() + if len(args) != 1: + parser.error('Expected an output filename') + + outfile, = args + + with io.open(README_FILE, encoding='utf-8') as f: + readme = f.read() + + readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme) + readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme) + readme = PREFIX + readme + + readme = filter_options(readme) + + with io.open(outfile, 'w', encoding='utf-8') as outf: + outf.write(readme) + def filter_options(readme): ret = '' @@ -37,27 +70,5 @@ def filter_options(readme): return ret -with io.open(README_FILE, encoding='utf-8') as f: - readme = f.read() - -PREFIX = '''%YOUTUBE-DL(1) - -# NAME - -youtube\-dl \- download videos from youtube.com or other video platforms - -# SYNOPSIS - -**youtube-dl** \[OPTIONS\] URL [URL...] - -''' -readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme) -readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme) -readme = PREFIX + readme - -readme = filter_options(readme) - -if sys.version_info < (3, 0): - print(readme.encode('utf-8')) -else: - print(readme) +if __name__ == '__main__': + main() diff --git a/devscripts/release.sh b/devscripts/release.sh index 7dd391b38..cde4d0a39 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -6,7 +6,7 @@ # * the git config user.signingkey is properly set # You will need -# pip install coverage nose rsa +# pip install coverage nose rsa wheel # TODO # release notes @@ -15,10 +15,28 @@ set -e skip_tests=true -if [ "$1" = '--run-tests' ]; then - skip_tests=false - shift -fi +buildserver='localhost:8142' + +while true +do +case "$1" in + --run-tests) + skip_tests=false + shift + ;; + --buildserver) + buildserver="$2" + shift 2 + ;; + --*) + echo "ERROR: unknown option $1" + exit 1 + ;; + *) + break + ;; +esac +done if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi version="$1" @@ -35,6 +53,7 @@ if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $us if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi +if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi /bin/echo -e "\n### First of all, testing..." make clean @@ -66,7 +85,7 @@ git push origin "$version" REV=$(git rev-parse HEAD) make youtube-dl youtube-dl.tar.gz read -p "VM running? (y/n) " -n 1 -wget "http://localhost:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe +wget "http://$buildserver/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe mkdir -p "build/$version" mv youtube-dl youtube-dl.exe "build/$version" mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz" diff --git a/docs/supportedsites.md b/docs/supportedsites.md index cd6bfa51c..bbc647030 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -43,8 +43,8 @@ - **appletrailers:section** - **archive.org**: archive.org videos - **ARD** - - **ARD:mediathek**: Saarländischer Rundfunk - **ARD:mediathek** + - **ARD:mediathek**: Saarländischer Rundfunk - **arte.tv** - **arte.tv:+7** - **arte.tv:cinema** @@ -136,6 +136,7 @@ - **ComedyCentral** - **ComedyCentralShows**: The Daily Show / The Colbert Report - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED + - **Coub** - **Cracked** - **Crackle** - **Criterion** @@ -205,6 +206,7 @@ - **exfm**: ex.fm - **ExpoTV** - **ExtremeTube** + - **EyedoTV** - **facebook** - **faz.net** - **fc2** @@ -326,8 +328,8 @@ - **LePlaylist** - **LetvCloud**: 乐视云 - **Libsyn** + - **life**: Life.ru - **life:embed** - - **lifenews**: LIFE | NEWS - **limelight** - **limelight:channel** - **limelight:channel_list** @@ -512,6 +514,8 @@ - **R7** - **radio.de** - **radiobremen** + - **radiocanada** + - **RadioCanadaAudioVideo** - **radiofrance** - **RadioJavan** - **Rai** @@ -521,6 +525,7 @@ - **RedTube** - **RegioTV** - **Restudy** + - **Reuters** - **ReverbNation** - **Revision3** - **RICE** @@ -682,8 +687,8 @@ - **TVCArticle** - **tvigle**: Интернет-телевидение Tvigle.ru - **tvland.com** - - **tvp.pl** - - **tvp.pl:Series** + - **tvp**: Telewizja Polska + - **tvp:series** - **TVPlay**: TV3Play and related services - **Tweakers** - **twitch:chapter** @@ -766,7 +771,8 @@ - **VuClip** - **vulture.com** - **Walla** - - **WashingtonPost** + - **washingtonpost** + - **washingtonpost:article** - **wat.tv** - **WatchIndianPorn**: Watch Indian Porn - **WDR** diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 8baff2041..71a54b4f4 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -1,34 +1,42 @@ # coding: utf-8 from __future__ import unicode_literals +import calendar +import datetime import re from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_etree_fromstring, + compat_str, + compat_parse_qs, + compat_xml_parse_error, +) from ..utils import ( - int_or_none, - unescapeHTML, ExtractorError, + int_or_none, + float_or_none, xpath_text, ) class BiliBiliIE(InfoExtractor): - _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P\d+)(?:/index_(?P\d+).html)?' + _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P\d+)' _TESTS = [{ 'url': 'http://www.bilibili.tv/video/av1074402/', - 'md5': '2c301e4dab317596e837c3e7633e7d86', + 'md5': '5f7d29e1a2872f3df0cf76b1f87d3788', 'info_dict': { 'id': '1554319', 'ext': 'flv', 'title': '【金坷垃】金泡沫', - 'duration': 308313, + 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', + 'duration': 308.067, + 'timestamp': 1398012660, 'upload_date': '20140420', 'thumbnail': 're:^https?://.+\.jpg', - 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', - 'timestamp': 1397983878, 'uploader': '菊子桑', + 'uploader_id': '156160', }, }, { 'url': 'http://www.bilibili.com/video/av1041170/', @@ -36,75 +44,110 @@ class BiliBiliIE(InfoExtractor): 'id': '1041170', 'title': '【BD1080P】刀语【诸神&异域】', 'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~', - 'uploader': '枫叶逝去', - 'timestamp': 1396501299, }, 'playlist_count': 9, }] + # BiliBili blocks keys from time to time. The current key is extracted from + # the Android client + # TODO: find the sign algorithm used in the flash player + _APP_KEY = '86385cdc024c0f6c' + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - page_num = mobj.group('page_num') or '1' - view_data = self._download_json( - 'http://api.bilibili.com/view?type=json&appkey=8e9fc618fbd41e28&id=%s&page=%s' % (video_id, page_num), - video_id) - if 'error' in view_data: - raise ExtractorError('%s said: %s' % (self.IE_NAME, view_data['error']), expected=True) + webpage = self._download_webpage(url, video_id) - cid = view_data['cid'] - title = unescapeHTML(view_data['title']) + params = compat_parse_qs(self._search_regex( + [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', + r']+src="https://secure\.bilibili\.com/secure,([^"]+)"'], + webpage, 'player parameters')) + cid = params['cid'][0] - doc = self._download_xml( - 'http://interface.bilibili.com/v_cdn_play?appkey=8e9fc618fbd41e28&cid=%s' % cid, - cid, - 'Downloading page %s/%s' % (page_num, view_data['pages']) - ) + info_xml_str = self._download_webpage( + 'http://interface.bilibili.com/v_cdn_play', + cid, query={'appkey': self._APP_KEY, 'cid': cid}, + note='Downloading video info page') - if xpath_text(doc, './result') == 'error': - raise ExtractorError('%s said: %s' % (self.IE_NAME, xpath_text(doc, './message')), expected=True) + err_msg = None + durls = None + info_xml = None + try: + info_xml = compat_etree_fromstring(info_xml_str.encode('utf-8')) + except compat_xml_parse_error: + info_json = self._parse_json(info_xml_str, video_id, fatal=False) + err_msg = (info_json or {}).get('error_text') + else: + err_msg = xpath_text(info_xml, './message') + + if info_xml is not None: + durls = info_xml.findall('./durl') + if not durls: + if err_msg: + raise ExtractorError('%s said: %s' % (self.IE_NAME, err_msg), expected=True) + else: + raise ExtractorError('No videos found!') entries = [] - for durl in doc.findall('./durl'): + for durl in durls: size = xpath_text(durl, ['./filesize', './size']) formats = [{ 'url': durl.find('./url').text, 'filesize': int_or_none(size), - 'ext': 'flv', }] - backup_urls = durl.find('./backup_url') - if backup_urls is not None: - for backup_url in backup_urls.findall('./url'): - formats.append({'url': backup_url.text}) - formats.reverse() + for backup_url in durl.findall('./backup_url/url'): + formats.append({ + 'url': backup_url.text, + # backup URLs have lower priorities + 'preference': -2 if 'hd.mp4' in backup_url.text else -3, + }) + + self._sort_formats(formats) entries.append({ 'id': '%s_part%s' % (cid, xpath_text(durl, './order')), - 'title': title, 'duration': int_or_none(xpath_text(durl, './length'), 1000), 'formats': formats, }) + title = self._html_search_regex(']+title="([^"]+)">', webpage, 'title') + description = self._html_search_meta('description', webpage) + datetime_str = self._html_search_regex( + r']+datetime="([^"]+)"', webpage, 'upload time', fatal=False) + if datetime_str: + timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple()) + + # TODO 'view_count' requires deobfuscating Javascript info = { 'id': compat_str(cid), 'title': title, - 'description': view_data.get('description'), - 'thumbnail': view_data.get('pic'), - 'uploader': view_data.get('author'), - 'timestamp': int_or_none(view_data.get('created')), - 'view_count': int_or_none(view_data.get('play')), - 'duration': int_or_none(xpath_text(doc, './timelength')), + 'description': description, + 'timestamp': timestamp, + 'thumbnail': self._html_search_meta('thumbnailUrl', webpage), + 'duration': float_or_none(xpath_text(info_xml, './timelength'), scale=1000), } + uploader_mobj = re.search( + r']+href="https?://space\.bilibili\.com/(?P\d+)"[^>]+title="(?P[^"]+)"', + webpage) + if uploader_mobj: + info.update({ + 'uploader': uploader_mobj.group('name'), + 'uploader_id': uploader_mobj.group('id'), + }) + + for entry in entries: + entry.update(info) + if len(entries) == 1: - entries[0].update(info) return entries[0] else: - info.update({ + return { '_type': 'multi_video', 'id': video_id, + 'title': title, + 'description': description, 'entries': entries, - }) - return info + } diff --git a/youtube_dl/extractor/coub.py b/youtube_dl/extractor/coub.py new file mode 100644 index 000000000..a901b8d22 --- /dev/null +++ b/youtube_dl/extractor/coub.py @@ -0,0 +1,143 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + parse_iso8601, + qualities, +) + + +class CoubIE(InfoExtractor): + _VALID_URL = r'(?:coub:|https?://(?:coub\.com/(?:view|embed|coubs)/|c-cdn\.coub\.com/fb-player\.swf\?.*\bcoub(?:ID|id)=))(?P[\da-z]+)' + + _TESTS = [{ + 'url': 'http://coub.com/view/5u5n1', + 'info_dict': { + 'id': '5u5n1', + 'ext': 'mp4', + 'title': 'The Matrix Moonwalk', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 4.6, + 'timestamp': 1428527772, + 'upload_date': '20150408', + 'uploader': 'Артём Лоскутников', + 'uploader_id': 'artyom.loskutnikov', + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'comment_count': int, + 'age_limit': 0, + }, + }, { + 'url': 'http://c-cdn.coub.com/fb-player.swf?bot_type=vk&coubID=7w5a4', + 'only_matching': True, + }, { + 'url': 'coub:5u5n1', + 'only_matching': True, + }, { + # longer video id + 'url': 'http://coub.com/view/237d5l5h', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + coub = self._download_json( + 'http://coub.com/api/v2/coubs/%s.json' % video_id, video_id) + + if coub.get('error'): + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, coub['error']), expected=True) + + title = coub['title'] + + file_versions = coub['file_versions'] + + QUALITIES = ('low', 'med', 'high') + + MOBILE = 'mobile' + IPHONE = 'iphone' + HTML5 = 'html5' + + SOURCE_PREFERENCE = (MOBILE, IPHONE, HTML5) + + quality_key = qualities(QUALITIES) + preference_key = qualities(SOURCE_PREFERENCE) + + formats = [] + + for kind, items in file_versions.get(HTML5, {}).items(): + if kind not in ('video', 'audio'): + continue + if not isinstance(items, dict): + continue + for quality, item in items.items(): + if not isinstance(item, dict): + continue + item_url = item.get('url') + if not item_url: + continue + formats.append({ + 'url': item_url, + 'format_id': '%s-%s-%s' % (HTML5, kind, quality), + 'filesize': int_or_none(item.get('size')), + 'vcodec': 'none' if kind == 'audio' else None, + 'quality': quality_key(quality), + 'preference': preference_key(HTML5), + }) + + iphone_url = file_versions.get(IPHONE, {}).get('url') + if iphone_url: + formats.append({ + 'url': iphone_url, + 'format_id': IPHONE, + 'preference': preference_key(IPHONE), + }) + + mobile_url = file_versions.get(MOBILE, {}).get('audio_url') + if mobile_url: + formats.append({ + 'url': mobile_url, + 'format_id': '%s-audio' % MOBILE, + 'preference': preference_key(MOBILE), + }) + + self._sort_formats(formats) + + thumbnail = coub.get('picture') + duration = float_or_none(coub.get('duration')) + timestamp = parse_iso8601(coub.get('published_at') or coub.get('created_at')) + uploader = coub.get('channel', {}).get('title') + uploader_id = coub.get('channel', {}).get('permalink') + + view_count = int_or_none(coub.get('views_count') or coub.get('views_increase_count')) + like_count = int_or_none(coub.get('likes_count')) + repost_count = int_or_none(coub.get('recoubs_count')) + comment_count = int_or_none(coub.get('comments_count')) + + age_restricted = coub.get('age_restricted', coub.get('age_restricted_by_admin')) + if age_restricted is not None: + age_limit = 18 if age_restricted is True else 0 + else: + age_limit = None + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'timestamp': timestamp, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'view_count': view_count, + 'like_count': like_count, + 'repost_count': repost_count, + 'comment_count': comment_count, + 'age_limit': age_limit, + 'formats': formats, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ddf62139e..dd4b2b838 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -143,6 +143,7 @@ from .cnn import ( CNNBlogsIE, CNNArticleIE, ) +from .coub import CoubIE from .collegerama import CollegeRamaIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .comcarcoff import ComCarCoffIE diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 13e0cd237..89b869559 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -142,7 +142,9 @@ class UdemyIE(InfoExtractor): self._LOGIN_URL, None, 'Downloading login popup') def is_logged(webpage): - return any(p in webpage for p in ['href="https://www.udemy.com/user/logout/', '>Logout<']) + return any(re.search(p, webpage) for p in ( + r'href=["\'](?:https://www\.udemy\.com)?/user/logout/', + r'>Logout<')) # already logged in if is_logged(login_popup): diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 041d93629..79c819bc3 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -217,7 +217,6 @@ class VKIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('videoid') - info_url = url if video_id: info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id # Some videos (removed?) can only be downloaded with list id specified diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index 0f78466e6..b37d0eab6 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -20,18 +20,24 @@ class YandexMusicBaseIE(InfoExtractor): error = response.get('error') if error: raise ExtractorError(error, expected=True) + if response.get('type') == 'captcha' or 'captcha' in response: + YandexMusicBaseIE._raise_captcha() + + @staticmethod + def _raise_captcha(): + raise ExtractorError( + 'YandexMusic has considered youtube-dl requests automated and ' + 'asks you to solve a CAPTCHA. You can either wait for some ' + 'time until unblocked and optionally use --sleep-interval ' + 'in future or alternatively you can go to https://music.yandex.ru/ ' + 'solve CAPTCHA, then export cookies and pass cookie file to ' + 'youtube-dl with --cookies', + expected=True) def _download_webpage(self, *args, **kwargs): webpage = super(YandexMusicBaseIE, self)._download_webpage(*args, **kwargs) if 'Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические.' in webpage: - raise ExtractorError( - 'YandexMusic has considered youtube-dl requests automated and ' - 'asks you to solve a CAPTCHA. You can either wait for some ' - 'time until unblocked and optionally use --sleep-interval ' - 'in future or alternatively you can go to https://music.yandex.ru/ ' - 'solve CAPTCHA, then export cookies and pass cookie file to ' - 'youtube-dl with --cookies', - expected=True) + self._raise_captcha() return webpage def _download_json(self, *args, **kwargs): diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 349ce0941..dbccbe228 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -275,6 +275,8 @@ class YoukuIE(InfoExtractor): 'format_id': self.get_format_name(fm), 'ext': self.parse_ext_l(fm), 'filesize': int(seg['size']), + 'width': stream.get('width'), + 'height': stream.get('height'), }) return { diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 522a56669..ad6fb26c6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.21.2' +__version__ = '2016.05.30.2'