diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py
index 3be0c646b..54b4b9be9 100644
--- a/youtube_dl/extractor/cbc.py
+++ b/youtube_dl/extractor/cbc.py
@@ -5,7 +5,10 @@ import json
import re
from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import (
+ compat_str,
+ compat_HTTPError,
+)
from ..utils import (
js_to_json,
smuggle_url,
@@ -206,30 +209,48 @@ class CBCWatchBaseIE(InfoExtractor):
def _call_api(self, path, video_id):
url = path if path.startswith('http') else self._API_BASE_URL + path
- result = self._download_xml(url, video_id, headers={
- 'X-Clearleap-DeviceId': self._device_id,
- 'X-Clearleap-DeviceToken': self._device_token,
- })
+ for _ in range(2):
+ try:
+ result = self._download_xml(url, video_id, headers={
+ 'X-Clearleap-DeviceId': self._device_id,
+ 'X-Clearleap-DeviceToken': self._device_token,
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ # Device token has expired, re-acquiring device token
+ self._register_device()
+ continue
+ raise
error_message = xpath_text(result, 'userMessage') or xpath_text(result, 'systemMessage')
if error_message:
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message))
return result
def _real_initialize(self):
- if not self._device_id or not self._device_token:
- device = self._downloader.cache.load('cbcwatch', 'device') or {}
- self._device_id, self._device_token = device.get('id'), device.get('token')
- if not self._device_id or not self._device_token:
- result = self._download_xml(
- self._API_BASE_URL + 'device/register',
- None, data=b'web')
- self._device_id = xpath_text(result, 'deviceId', fatal=True)
- self._device_token = xpath_text(result, 'deviceToken', fatal=True)
- self._downloader.cache.store(
- 'cbcwatch', 'device', {
- 'id': self._device_id,
- 'token': self._device_token,
- })
+ if self._valid_device_token():
+ return
+ device = self._downloader.cache.load('cbcwatch', 'device') or {}
+ self._device_id, self._device_token = device.get('id'), device.get('token')
+ if self._valid_device_token():
+ return
+ self._register_device()
+
+ def _valid_device_token(self):
+ return self._device_id and self._device_token
+
+ def _register_device(self):
+ self._device_id = self._device_token = None
+ result = self._download_xml(
+ self._API_BASE_URL + 'device/register',
+ None, 'Acquiring device token',
+ data=b'web')
+ self._device_id = xpath_text(result, 'deviceId', fatal=True)
+ self._device_token = xpath_text(result, 'deviceToken', fatal=True)
+ self._downloader.cache.store(
+ 'cbcwatch', 'device', {
+ 'id': self._device_id,
+ 'token': self._device_token,
+ })
def _parse_rss_feed(self, rss):
channel = xpath_element(rss, 'channel', fatal=True)
diff --git a/youtube_dl/extractor/fxnetworks.py b/youtube_dl/extractor/fxnetworks.py
index 37549fb01..00e67426b 100644
--- a/youtube_dl/extractor/fxnetworks.py
+++ b/youtube_dl/extractor/fxnetworks.py
@@ -41,7 +41,7 @@ class FXNetworksIE(AdobePassIE):
if 'The content you are trying to access is not available in your region.' in webpage:
self.raise_geo_restricted()
video_data = extract_attributes(self._search_regex(
- r'()', webpage, 'video data'))
+ r'()', webpage, 'video data'))
player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None)
release_url = video_data['rel']
title = video_data['data-title']
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index 9ce513aeb..23e24d216 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -33,7 +33,7 @@ class PornHubIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:
- (?:[a-z]+\.)?pornhub\.com/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
+ (?:[^/]+\.)?pornhub\.com/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/
)
(?P[\da-z]+)
@@ -264,7 +264,7 @@ class PornHubPlaylistBaseIE(InfoExtractor):
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
- _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P\d+)'
+ _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/playlist/(?P\d+)'
_TESTS = [{
'url': 'http://www.pornhub.com/playlist/4667351',
'info_dict': {
@@ -272,11 +272,14 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE):
'title': 'Nataly Hot',
},
'playlist_mincount': 2,
+ }, {
+ 'url': 'https://de.pornhub.com/playlist/4667351',
+ 'only_matching': True,
}]
class PornHubUserVideosIE(PornHubPlaylistBaseIE):
- _VALID_URL = r'https?://(?:www\.)?pornhub\.com/(?:user|channel)s/(?P[^/]+)/videos'
+ _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/(?:user|channel)s/(?P[^/]+)/videos'
_TESTS = [{
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
'info_dict': {
@@ -305,6 +308,9 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
# Most Viewed Videos
'url': 'https://www.pornhub.com/channels/povd/videos?o=vi',
'only_matching': True,
+ }, {
+ 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
+ 'only_matching': True,
}]
def _real_extract(self, url):