diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 6d3abb52f..013399fd6 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -1,7 +1,9 @@ # encoding: utf-8 from __future__ import unicode_literals +import random import re +import string import json import base64 import zlib @@ -36,10 +38,18 @@ from ..aes import ( class CrunchyrollBaseIE(InfoExtractor): _NETRC_MACHINE = 'crunchyroll' + def __init__(self, *args, **kwargs): + super(CrunchyrollBaseIE, self).__init__(*args, **kwargs) + self.api_session_id = None + self.api_session_auth = None + self.api_device_id = ''.join(random.sample(string.ascii_letters + string.digits, 32)) + def _login(self): (username, password) = self._get_login_info() if username is None: return + + # Log into main website self.report_login() login_url = 'https://www.crunchyroll.com/?a=formhandler' data = urlencode_postdata({ @@ -51,6 +61,41 @@ class CrunchyrollBaseIE(InfoExtractor): login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') self._download_webpage(login_request, None, False, 'Wrong login info') + # Start session with mobile API + res = self._api_call( + 'start_session', + { + 'device_id': self.api_device_id, + 'device_type': 'com.crunchyroll.iphone', + 'access_token': 'QWjz212GspMHH9h' + }, + video_id=None, + note='Starting session with mobile API', + errnote='Could not start session with mobile API', + # If mobile API fails, we can always fall back on the regular website + fatal=False + ) + + if res is not None: + self.api_session_id = res.get('data', {'session_id': None}).get('session_id') + + # Log into mobile API + res = self._api_call( + 'login', + { + 'account': username, + 'password': password + }, + video_id=None, + note='Logging in to mobile API', + errnote='Could not log into mobile API', + # If mobile API fails, we can always fall back on the regular website + fatal=False + ) + + if res is not None: + self.api_session_auth = res.get('data', {'auth': None}).get('auth') + def _real_initialize(self): self._login() @@ -67,6 +112,42 @@ class CrunchyrollBaseIE(InfoExtractor): request.add_header('Accept-Language', '*') return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs) + def _api_call(self, entrypoint, params, video_id, *args, **kwargs): + '''Makes a call against the api. + + :param entrypoint: API method to call. + :param params: parameters to include in the request data. + :param schema: schema to use to validate the data + + Source adapted from: + https://github.com/chrippa/livestreamer/blob/develop/src/livestreamer/plugins/crunchyroll.py + Copyright (c) 2011-2015, Christopher Rosell + License: https://github.com/chrippa/livestreamer/blob/develop/LICENSE + ''' + + url = 'https://api.crunchyroll.com/{0}.0.json'.format(entrypoint) + + # Default params + params = dict(params) + params.update({ + 'version': '2313.8', + 'locale': 'enUS', + }) + + if self.api_session_id: + params["session_id"] = self.api_session_id + + # Headers + headers = { + 'Host': 'api.crunchyroll.com', + 'Accept-Encoding': 'gzip, deflzate', + 'Accept': '*/*', + 'Content-Type': 'application/x-www-form-urlencoded' + } + headers['User-Agent'] = 'Mozilla/5.0 (iPhone; iPhone OS 8.3.0; en_US)' + + return self._download_json(url, video_id, query=params, headers=headers, *args, **kwargs) + @staticmethod def _add_skip_wall(url): parsed_url = compat_urlparse.urlparse(url) @@ -87,7 +168,7 @@ class CrunchyrollIE(CrunchyrollBaseIE): 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', 'info_dict': { 'id': '645513', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!', 'description': 'md5:2d17137920c64f2f49981a7797d275ef', 'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg', @@ -96,7 +177,7 @@ class CrunchyrollIE(CrunchyrollBaseIE): 'url': 're:(?!.*&)', }, 'params': { - # rtmp + # m3u8 download 'skip_download': True, }, }, { @@ -145,6 +226,22 @@ class CrunchyrollIE(CrunchyrollBaseIE): '1080': ('80', '108'), } + # Crunchyroll does not give us bitrate data for the RTMP sources, + # so by default self._sort_formats() will put the HLS streams first. + # However, the HLS streams are actually a lower bitrate than their + # RTMP counterparts. + _FORMAT_QUALITY = { + 'ultralow': 2, + 'low': 3, + '360p': 4, + 'mid': 5, + '480p': 6, + 'high': 7, + '720p': 8, + 'ultra': 9, + '1080p': 10 + } + def _decrypt_subtitles(self, data, iv, id): data = bytes_to_intlist(base64.b64decode(data.encode('utf-8'))) iv = bytes_to_intlist(base64.b64decode(iv.encode('utf-8'))) @@ -281,6 +378,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text mobj = re.match(self._VALID_URL, url) video_id = mobj.group('video_id') + # Fetch mobile webpage if mobj.group('prefix') == 'm': mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage') webpage_url = self._search_regex(r'', mobile_webpage, 'webpage_url') @@ -303,6 +401,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text if 'To view this, please log in to verify you are 18 or older.' in webpage: self.raise_login_required() + # Extract title, description, and other metadata + # from the mobile pagedlist video_title = self._html_search_regex( r'(?s)