From 73453430c11002a7193eaa9fb8cf5349fa326c93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 21 Mar 2020 00:59:48 +0700 Subject: [PATCH 1/3] [hellporno] Fix extraction (closes #24399) --- youtube_dl/extractor/hellporno.py | 73 ++++++++++++++++--------------- 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/hellporno.py b/youtube_dl/extractor/hellporno.py index 0ee8ea712..fae425103 100644 --- a/youtube_dl/extractor/hellporno.py +++ b/youtube_dl/extractor/hellporno.py @@ -1,12 +1,11 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( - js_to_json, + int_or_none, + merge_dicts, remove_end, - determine_ext, + unified_timestamp, ) @@ -14,15 +13,21 @@ class HellPornoIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?hellporno\.(?:com/videos|net/v)/(?P[^/]+)' _TESTS = [{ 'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/', - 'md5': '1fee339c610d2049699ef2aa699439f1', + 'md5': 'f0a46ebc0bed0c72ae8fe4629f7de5f3', 'info_dict': { 'id': '149116', 'display_id': 'dixie-is-posing-with-naked-ass-very-erotic', 'ext': 'mp4', 'title': 'Dixie is posing with naked ass very erotic', + 'description': 'md5:9a72922749354edb1c4b6e540ad3d215', + 'categories': list, 'thumbnail': r're:https?://.*\.jpg$', + 'duration': 240, + 'timestamp': 1398762720, + 'upload_date': '20140429', + 'view_count': int, 'age_limit': 18, - } + }, }, { 'url': 'http://hellporno.net/v/186271/', 'only_matching': True, @@ -36,40 +41,36 @@ class HellPornoIE(InfoExtractor): title = remove_end(self._html_search_regex( r'([^<]+)', webpage, 'title'), ' - Hell Porno') - flashvars = self._parse_json(self._search_regex( - r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'), - display_id, transform_source=js_to_json) + info = self._parse_html5_media_entries(url, webpage, display_id)[0] + self._sort_formats(info['formats']) - video_id = flashvars.get('video_id') - thumbnail = flashvars.get('preview_url') - ext = determine_ext(flashvars.get('postfix'), 'mp4') + video_id = self._search_regex( + (r'chs_object\s*=\s*["\'](\d+)', + r'params\[["\']video_id["\']\]\s*=\s*(\d+)'), webpage, 'video id', + default=display_id) + description = self._search_regex( + r'class=["\']desc_video_view_v2[^>]+>([^<]+)', webpage, + 'description', fatal=False) + categories = [ + c.strip() + for c in self._html_search_meta( + 'keywords', webpage, 'categories', default='').split(',') + if c.strip()] + duration = int_or_none(self._og_search_property( + 'video:duration', webpage, fatal=False)) + timestamp = unified_timestamp(self._og_search_property( + 'video:release_date', webpage, fatal=False)) + view_count = int_or_none(self._search_regex( + r'>Views\s+(\d+)', webpage, 'view count', fatal=False)) - formats = [] - for video_url_key in ['video_url', 'video_alt_url']: - video_url = flashvars.get(video_url_key) - if not video_url: - continue - video_text = flashvars.get('%s_text' % video_url_key) - fmt = { - 'url': video_url, - 'ext': ext, - 'format_id': video_text, - } - m = re.search(r'^(?P\d+)[pP]', video_text) - if m: - fmt['height'] = int(m.group('height')) - formats.append(fmt) - self._sort_formats(formats) - - categories = self._html_search_meta( - 'keywords', webpage, 'categories', default='').split(',') - - return { + return merge_dicts(info, { 'id': video_id, 'display_id': display_id, 'title': title, - 'thumbnail': thumbnail, + 'description': description, 'categories': categories, + 'duration': duration, + 'timestamp': timestamp, + 'view_count': view_count, 'age_limit': 18, - 'formats': formats, - } + }) From 787c3604671283bd4945eefb87866d01fb973097 Mon Sep 17 00:00:00 2001 From: Devon Meunier Date: Sun, 19 May 2019 07:32:46 -0400 Subject: [PATCH 2/3] [cbc:watch] Add support for authentication --- youtube_dl/extractor/cbc.py | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index 751a3a8f2..b02cddbfd 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import json import re +from xml.sax.saxutils import escape from .common import InfoExtractor from ..compat import ( @@ -216,6 +217,29 @@ class CBCWatchBaseIE(InfoExtractor): 'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/', } _GEO_COUNTRIES = ['CA'] + _LOGIN_URL = 'https://api.loginradius.com/identity/v2/auth/login' + _TOKEN_URL = 'https://cloud-api.loginradius.com/sso/jwt/api/token' + _API_KEY = '3f4beddd-2061-49b0-ae80-6f1f2ed65b37' + _NETRC_MACHINE = 'cbcwatch' + + def _signature(self, email, password): + data = json.dumps({ + 'email': email, + 'password': password, + }).encode() + headers = {'content-type': 'application/json'} + query = {'apikey': self._API_KEY} + resp = self._download_json(self._LOGIN_URL, None, data=data, headers=headers, query=query) + access_token = resp['access_token'] + + # token + query = { + 'access_token': access_token, + 'apikey': self._API_KEY, + 'jwtapp': 'jwt', + } + resp = self._download_json(self._TOKEN_URL, None, headers=headers, query=query) + return resp['signature'] def _call_api(self, path, video_id): url = path if path.startswith('http') else self._API_BASE_URL + path @@ -249,13 +273,21 @@ class CBCWatchBaseIE(InfoExtractor): return self._device_id and self._device_token def _register_device(self): - self._device_id = self._device_token = None result = self._download_xml( self._API_BASE_URL + 'device/register', None, 'Acquiring device token', data=b'web') self._device_id = xpath_text(result, 'deviceId', fatal=True) - self._device_token = xpath_text(result, 'deviceToken', fatal=True) + anon_device_token = xpath_text(result, 'deviceToken', fatal=True) + email, password = self._get_login_info() + if email and password: + signature = self._signature(email, password) + data = '{0}{1}web'.format(escape(signature), escape(self._device_id)).encode() + url = self._API_BASE_URL + 'device/login' + result = self._download_xml(url, None, data=data, headers={'content-type': 'application/xml'}) + self._device_token = xpath_text(result, 'token', fatal=True) + else: + self._device_token = anon_device_token self._downloader.cache.store( 'cbcwatch', 'device', { 'id': self._device_id, From c76cdf2382c91af13de0c7580b1b5e1b24484664 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 21 Mar 2020 01:41:54 +0700 Subject: [PATCH 3/3] [cbc:watch] Fix authenticated device token caching (closes #19160) --- youtube_dl/extractor/cbc.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index b02cddbfd..fd5ec6033 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import hashlib import json import re from xml.sax.saxutils import escape @@ -263,7 +264,8 @@ class CBCWatchBaseIE(InfoExtractor): def _real_initialize(self): if self._valid_device_token(): return - device = self._downloader.cache.load('cbcwatch', 'device') or {} + device = self._downloader.cache.load( + 'cbcwatch', self._cache_device_key()) or {} self._device_id, self._device_token = device.get('id'), device.get('token') if self._valid_device_token(): return @@ -272,24 +274,30 @@ class CBCWatchBaseIE(InfoExtractor): def _valid_device_token(self): return self._device_id and self._device_token + def _cache_device_key(self): + email, _ = self._get_login_info() + return '%s_device' % hashlib.sha256(email.encode()).hexdigest() if email else 'device' + def _register_device(self): result = self._download_xml( self._API_BASE_URL + 'device/register', None, 'Acquiring device token', data=b'web') self._device_id = xpath_text(result, 'deviceId', fatal=True) - anon_device_token = xpath_text(result, 'deviceToken', fatal=True) email, password = self._get_login_info() if email and password: signature = self._signature(email, password) - data = '{0}{1}web'.format(escape(signature), escape(self._device_id)).encode() + data = '{0}{1}web'.format( + escape(signature), escape(self._device_id)).encode() url = self._API_BASE_URL + 'device/login' - result = self._download_xml(url, None, data=data, headers={'content-type': 'application/xml'}) + result = self._download_xml( + url, None, data=data, + headers={'content-type': 'application/xml'}) self._device_token = xpath_text(result, 'token', fatal=True) else: - self._device_token = anon_device_token + self._device_token = xpath_text(result, 'deviceToken', fatal=True) self._downloader.cache.store( - 'cbcwatch', 'device', { + 'cbcwatch', self._cache_device_key(), { 'id': self._device_id, 'token': self._device_token, })