Merge remote-tracking branch 'source/master' into linkedin-learning-subtitles

This commit is contained in:
Erez Volk 2020-01-06 10:30:35 +02:00
commit 058467a877
20 changed files with 419 additions and 225 deletions

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.12.25. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.01.01. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support - [ ] I'm reporting a broken site support
- [ ] I've verified that I'm running youtube-dl version **2019.12.25** - [ ] I've verified that I'm running youtube-dl version **2020.01.01**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar issues including closed ones - [ ] I've searched the bugtracker for similar issues including closed ones
@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2019.12.25 [debug] youtube-dl version 2020.01.01
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,7 +19,7 @@ labels: 'site-support-request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.12.25. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.01.01. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a new site support request - [ ] I'm reporting a new site support request
- [ ] I've verified that I'm running youtube-dl version **2019.12.25** - [ ] I've verified that I'm running youtube-dl version **2020.01.01**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've checked that none of provided URLs violate any copyrights
- [ ] I've searched the bugtracker for similar site support requests including closed ones - [ ] I've searched the bugtracker for similar site support requests including closed ones

View File

@ -18,13 +18,13 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.12.25. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.01.01. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a site feature request - [ ] I'm reporting a site feature request
- [ ] I've verified that I'm running youtube-dl version **2019.12.25** - [ ] I've verified that I'm running youtube-dl version **2020.01.01**
- [ ] I've searched the bugtracker for similar site feature requests including closed ones - [ ] I've searched the bugtracker for similar site feature requests including closed ones

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.12.25. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.01.01. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support issue - [ ] I'm reporting a broken site support issue
- [ ] I've verified that I'm running youtube-dl version **2019.12.25** - [ ] I've verified that I'm running youtube-dl version **2020.01.01**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar bug reports including closed ones - [ ] I've searched the bugtracker for similar bug reports including closed ones
@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2019.12.25 [debug] youtube-dl version 2020.01.01
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,13 +19,13 @@ labels: 'request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.12.25. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.01.01. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a feature request - [ ] I'm reporting a feature request
- [ ] I've verified that I'm running youtube-dl version **2019.12.25** - [ ] I've verified that I'm running youtube-dl version **2020.01.01**
- [ ] I've searched the bugtracker for similar feature requests including closed ones - [ ] I've searched the bugtracker for similar feature requests including closed ones

View File

@ -1,3 +1,24 @@
version 2020.01.01
Extractors
* [brightcove] Invalidate policy key cache on failing requests
* [pornhub] Improve locked videos detection (#22449, #22780)
+ [pornhub] Add support for m3u8 formats
* [pornhub] Fix extraction (#22749, #23082)
* [brightcove] Update policy key on failing requests
* [spankbang] Improve removed video detection (#23423)
* [spankbang] Fix extraction (#23307, #23423, #23444)
* [soundcloud] Automatically update client id on failing requests
* [prosiebensat1] Improve geo restriction handling (#23571)
* [brightcove] Cache brightcove player policy keys
* [teachable] Fail with error message if no video URL found
* [teachable] Improve locked lessons detection (#23528)
+ [scrippsnetworks] Add support for Scripps Networks sites (#19857, #22981)
* [mitele] Fix extraction (#21354, #23456)
* [soundcloud] Update client id (#23516)
* [mailru] Relax URL regular expressions (#23509)
version 2019.12.25 version 2019.12.25
Core Core

View File

@ -1,7 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import io import io
import json import json
import mimetypes import mimetypes
@ -15,7 +14,6 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.compat import ( from youtube_dl.compat import (
compat_basestring, compat_basestring,
compat_input,
compat_getpass, compat_getpass,
compat_print, compat_print,
compat_urllib_request, compat_urllib_request,
@ -40,28 +38,20 @@ class GitHubReleaser(object):
try: try:
info = netrc.netrc().authenticators(self._NETRC_MACHINE) info = netrc.netrc().authenticators(self._NETRC_MACHINE)
if info is not None: if info is not None:
self._username = info[0] self._token = info[2]
self._password = info[2]
compat_print('Using GitHub credentials found in .netrc...') compat_print('Using GitHub credentials found in .netrc...')
return return
else: else:
compat_print('No GitHub credentials found in .netrc') compat_print('No GitHub credentials found in .netrc')
except (IOError, netrc.NetrcParseError): except (IOError, netrc.NetrcParseError):
compat_print('Unable to parse .netrc') compat_print('Unable to parse .netrc')
self._username = compat_input( self._token = compat_getpass(
'Type your GitHub username or email address and press [Return]: ') 'Type your GitHub PAT (personal access token) and press [Return]: ')
self._password = compat_getpass(
'Type your GitHub password and press [Return]: ')
def _call(self, req): def _call(self, req):
if isinstance(req, compat_basestring): if isinstance(req, compat_basestring):
req = sanitized_Request(req) req = sanitized_Request(req)
# Authorizing manually since GitHub does not response with 401 with req.add_header('Authorization', 'token %s' % self._token)
# WWW-Authenticate header set (see
# https://developer.github.com/v3/#basic-authentication)
b64 = base64.b64encode(
('%s:%s' % (self._username, self._password)).encode('utf-8')).decode('ascii')
req.add_header('Authorization', 'Basic %s' % b64)
response = self._opener.open(req).read().decode('utf-8') response = self._opener.open(req).read().decode('utf-8')
return json.loads(response) return json.loads(response)

View File

@ -761,6 +761,7 @@
- **screen.yahoo:search**: Yahoo screen search - **screen.yahoo:search**: Yahoo screen search
- **Screencast** - **Screencast**
- **ScreencastOMatic** - **ScreencastOMatic**
- **ScrippsNetworks**
- **scrippsnetworks:watch** - **scrippsnetworks:watch**
- **SCTE** - **SCTE**
- **SCTECourse** - **SCTECourse**

View File

@ -586,45 +586,63 @@ class BrightcoveNewIE(AdobePassIE):
account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups() account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage( policy_key_id = '%s_%s' % (account_id, player_id)
'http://players.brightcove.net/%s/%s_%s/index.min.js' policy_key = self._downloader.cache.load('brightcove', policy_key_id)
% (account_id, player_id, embed), video_id) policy_key_extracted = False
store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x)
policy_key = None def extract_policy_key():
webpage = self._download_webpage(
'http://players.brightcove.net/%s/%s_%s/index.min.js'
% (account_id, player_id, embed), video_id)
catalog = self._search_regex( policy_key = None
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
if catalog: catalog = self._search_regex(
catalog = self._parse_json( r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
js_to_json(catalog), video_id, fatal=False)
if catalog: if catalog:
policy_key = catalog.get('policyKey') catalog = self._parse_json(
js_to_json(catalog), video_id, fatal=False)
if catalog:
policy_key = catalog.get('policyKey')
if not policy_key: if not policy_key:
policy_key = self._search_regex( policy_key = self._search_regex(
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1', r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
webpage, 'policy key', group='pk') webpage, 'policy key', group='pk')
store_pk(policy_key)
return policy_key
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id) api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id)
headers = { headers = {}
'Accept': 'application/json;pk=%s' % policy_key,
}
referrer = smuggled_data.get('referrer') referrer = smuggled_data.get('referrer')
if referrer: if referrer:
headers.update({ headers.update({
'Referer': referrer, 'Referer': referrer,
'Origin': re.search(r'https?://[^/]+', referrer).group(0), 'Origin': re.search(r'https?://[^/]+', referrer).group(0),
}) })
try:
json_data = self._download_json(api_url, video_id, headers=headers) for _ in range(2):
except ExtractorError as e: if not policy_key:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: policy_key = extract_policy_key()
json_data = self._parse_json(e.cause.read().decode(), video_id)[0] policy_key_extracted = True
message = json_data.get('message') or json_data['error_code'] headers['Accept'] = 'application/json;pk=%s' % policy_key
if json_data.get('error_subcode') == 'CLIENT_GEO': try:
self.raise_geo_restricted(msg=message) json_data = self._download_json(api_url, video_id, headers=headers)
raise ExtractorError(message, expected=True) break
raise except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
message = json_data.get('message') or json_data['error_code']
if json_data.get('error_subcode') == 'CLIENT_GEO':
self.raise_geo_restricted(msg=message)
elif json_data.get('error_code') == 'INVALID_POLICY_KEY' and not policy_key_extracted:
policy_key = None
store_pk(None)
continue
raise ExtractorError(message, expected=True)
raise
errors = json_data.get('errors') errors = json_data.get('errors')
if errors and errors[0].get('error_subcode') == 'TVE_AUTH': if errors and errors[0].get('error_subcode') == 'TVE_AUTH':

View File

@ -963,7 +963,10 @@ from .savefrom import SaveFromIE
from .sbs import SBSIE from .sbs import SBSIE
from .screencast import ScreencastIE from .screencast import ScreencastIE
from .screencastomatic import ScreencastOMaticIE from .screencastomatic import ScreencastOMaticIE
from .scrippsnetworks import ScrippsNetworksWatchIE from .scrippsnetworks import (
ScrippsNetworksWatchIE,
ScrippsNetworksIE,
)
from .scte import ( from .scte import (
SCTEIE, SCTEIE,
SCTECourseIE, SCTECourseIE,

View File

@ -12,6 +12,7 @@ from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
JSON_LD_RE, JSON_LD_RE,
js_to_json,
NO_DEFAULT, NO_DEFAULT,
parse_age_limit, parse_age_limit,
parse_duration, parse_duration,
@ -105,6 +106,7 @@ class NRKBaseIE(InfoExtractor):
MESSAGES = { MESSAGES = {
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet', 'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
'ProgramRightsHasExpired': 'Programmet har gått ut', 'ProgramRightsHasExpired': 'Programmet har gått ut',
'NoProgramRights': 'Ikke tilgjengelig',
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge', 'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
} }
message_type = data.get('messageType', '') message_type = data.get('messageType', '')
@ -255,6 +257,17 @@ class NRKTVIE(NRKBaseIE):
''' % _EPISODE_RE ''' % _EPISODE_RE
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no') _API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
_TESTS = [{ _TESTS = [{
'url': 'https://tv.nrk.no/program/MDDP12000117',
'md5': '8270824df46ec629b66aeaa5796b36fb',
'info_dict': {
'id': 'MDDP12000117AA',
'ext': 'mp4',
'title': 'Alarm Trolltunga',
'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
'duration': 2223,
'age_limit': 6,
},
}, {
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
'md5': '9a167e54d04671eb6317a37b7bc8a280', 'md5': '9a167e54d04671eb6317a37b7bc8a280',
'info_dict': { 'info_dict': {
@ -266,6 +279,7 @@ class NRKTVIE(NRKBaseIE):
'series': '20 spørsmål', 'series': '20 spørsmål',
'episode': '23.05.2014', 'episode': '23.05.2014',
}, },
'skip': 'NoProgramRights',
}, { }, {
'url': 'https://tv.nrk.no/program/mdfp15000514', 'url': 'https://tv.nrk.no/program/mdfp15000514',
'info_dict': { 'info_dict': {
@ -370,7 +384,24 @@ class NRKTVIE(NRKBaseIE):
class NRKTVEpisodeIE(InfoExtractor): class NRKTVEpisodeIE(InfoExtractor):
_VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/\d+/episode/\d+)' _VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/\d+/episode/\d+)'
_TEST = { _TESTS = [{
'url': 'https://tv.nrk.no/serie/hellums-kro/sesong/1/episode/2',
'info_dict': {
'id': 'MUHH36005220BA',
'ext': 'mp4',
'title': 'Kro, krig og kjærlighet 2:6',
'description': 'md5:b32a7dc0b1ed27c8064f58b97bda4350',
'duration': 1563,
'series': 'Hellums kro',
'season_number': 1,
'episode_number': 2,
'episode': '2:6',
'age_limit': 6,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://tv.nrk.no/serie/backstage/sesong/1/episode/8', 'url': 'https://tv.nrk.no/serie/backstage/sesong/1/episode/8',
'info_dict': { 'info_dict': {
'id': 'MSUI14000816AA', 'id': 'MSUI14000816AA',
@ -386,7 +417,8 @@ class NRKTVEpisodeIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
} 'skip': 'ProgramRightsHasExpired',
}]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
@ -409,7 +441,7 @@ class NRKTVSerieBaseIE(InfoExtractor):
(r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;', (r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;',
r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'), r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'),
webpage, 'config', default='{}' if not fatal else NO_DEFAULT), webpage, 'config', default='{}' if not fatal else NO_DEFAULT),
display_id, fatal=False) display_id, fatal=False, transform_source=js_to_json)
if not config: if not config:
return return
return try_get( return try_get(
@ -479,6 +511,14 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)' _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)' _ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://tv.nrk.no/serie/blank',
'info_dict': {
'id': 'blank',
'title': 'Blank',
'description': 'md5:7664b4e7e77dc6810cd3bca367c25b6e',
},
'playlist_mincount': 30,
}, {
# new layout, seasons # new layout, seasons
'url': 'https://tv.nrk.no/serie/backstage', 'url': 'https://tv.nrk.no/serie/backstage',
'info_dict': { 'info_dict': {
@ -648,7 +688,7 @@ class NRKSkoleIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'https://www.nrk.no/skole/?page=search&q=&mediaId=14099', 'url': 'https://www.nrk.no/skole/?page=search&q=&mediaId=14099',
'md5': '6bc936b01f9dd8ed45bc58b252b2d9b6', 'md5': '18c12c3d071953c3bf8d54ef6b2587b7',
'info_dict': { 'info_dict': {
'id': '6021', 'id': '6021',
'ext': 'mp4', 'ext': 'mp4',

View File

@ -17,6 +17,7 @@ from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
NO_DEFAULT,
orderedSet, orderedSet,
remove_quotes, remove_quotes,
str_to_int, str_to_int,
@ -227,12 +228,13 @@ class PornHubIE(PornHubBaseIE):
else: else:
thumbnail, duration = [None] * 2 thumbnail, duration = [None] * 2
if not video_urls: def extract_js_vars(webpage, pattern, default=NO_DEFAULT):
tv_webpage = dl_webpage('tv')
assignments = self._search_regex( assignments = self._search_regex(
r'(var.+?mediastring.+?)</script>', tv_webpage, pattern, webpage, 'encoded url', default=default)
'encoded url').split(';') if not assignments:
return {}
assignments = assignments.split(';')
js_vars = {} js_vars = {}
@ -254,11 +256,35 @@ class PornHubIE(PornHubBaseIE):
assn = re.sub(r'var\s+', '', assn) assn = re.sub(r'var\s+', '', assn)
vname, value = assn.split('=', 1) vname, value = assn.split('=', 1)
js_vars[vname] = parse_js_value(value) js_vars[vname] = parse_js_value(value)
return js_vars
video_url = js_vars['mediastring'] def add_video_url(video_url):
if video_url not in video_urls_set: v_url = url_or_none(video_url)
video_urls.append((video_url, None)) if not v_url:
video_urls_set.add(video_url) return
if v_url in video_urls_set:
return
video_urls.append((v_url, None))
video_urls_set.add(v_url)
if not video_urls:
FORMAT_PREFIXES = ('media', 'quality')
js_vars = extract_js_vars(
webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES),
default=None)
if js_vars:
for key, format_url in js_vars.items():
if any(key.startswith(p) for p in FORMAT_PREFIXES):
add_video_url(format_url)
if not video_urls and re.search(
r'<[^>]+\bid=["\']lockedPlayer', webpage):
raise ExtractorError(
'Video %s is locked' % video_id, expected=True)
if not video_urls:
js_vars = extract_js_vars(
dl_webpage('tv'), r'(var.+?mediastring.+?)</script>')
add_video_url(js_vars['mediastring'])
for mobj in re.finditer( for mobj in re.finditer(
r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1', r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
@ -276,10 +302,16 @@ class PornHubIE(PornHubBaseIE):
r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None) r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
if upload_date: if upload_date:
upload_date = upload_date.replace('/', '') upload_date = upload_date.replace('/', '')
if determine_ext(video_url) == 'mpd': ext = determine_ext(video_url)
if ext == 'mpd':
formats.extend(self._extract_mpd_formats( formats.extend(self._extract_mpd_formats(
video_url, video_id, mpd_id='dash', fatal=False)) video_url, video_id, mpd_id='dash', fatal=False))
continue continue
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
continue
tbr = None tbr = None
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url) mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
if mobj: if mobj:

View File

@ -16,7 +16,7 @@ from ..utils import (
class ProSiebenSat1BaseIE(InfoExtractor): class ProSiebenSat1BaseIE(InfoExtractor):
_GEO_COUNTRIES = ['DE'] _GEO_BYPASS = False
_ACCESS_ID = None _ACCESS_ID = None
_SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear' _SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear'
_V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get' _V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get'
@ -39,14 +39,18 @@ class ProSiebenSat1BaseIE(InfoExtractor):
formats = [] formats = []
if self._ACCESS_ID: if self._ACCESS_ID:
raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID
server_token = (self._download_json( protocols = self._download_json(
self._V4_BASE_URL + 'protocols', clip_id, self._V4_BASE_URL + 'protocols', clip_id,
'Downloading protocols JSON', 'Downloading protocols JSON',
headers=self.geo_verification_headers(), query={ headers=self.geo_verification_headers(), query={
'access_id': self._ACCESS_ID, 'access_id': self._ACCESS_ID,
'client_token': sha1((raw_ct).encode()).hexdigest(), 'client_token': sha1((raw_ct).encode()).hexdigest(),
'video_id': clip_id, 'video_id': clip_id,
}, fatal=False) or {}).get('server_token') }, fatal=False, expected_status=(403,)) or {}
error = protocols.get('error') or {}
if error.get('title') == 'Geo check failed':
self.raise_geo_restricted(countries=['AT', 'CH', 'DE'])
server_token = protocols.get('server_token')
if server_token: if server_token:
urls = (self._download_json( urls = (self._download_json(
self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={ self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={

View File

@ -43,8 +43,15 @@ class RedTubeIE(InfoExtractor):
webpage = self._download_webpage( webpage = self._download_webpage(
'http://www.redtube.com/%s' % video_id, video_id) 'http://www.redtube.com/%s' % video_id, video_id)
if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']): ERRORS = (
raise ExtractorError('Video %s has been removed' % video_id, expected=True) (('video-deleted-info', '>This video has been removed'), 'has been removed'),
(('private_video_text', '>This video is private', '>Send a friend request to its owner to be able to view it'), 'is private'),
)
for patterns, message in ERRORS:
if any(p in webpage for p in patterns):
raise ExtractorError(
'Video %s %s' % (video_id, message), expected=True)
info = self._search_json_ld(webpage, video_id, default={}) info = self._search_json_ld(webpage, video_id, default={})

View File

@ -9,10 +9,13 @@ from .common import (
SearchInfoExtractor SearchInfoExtractor
) )
from ..compat import ( from ..compat import (
compat_HTTPError,
compat_kwargs,
compat_str, compat_str,
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
error_to_compat_str,
ExtractorError, ExtractorError,
float_or_none, float_or_none,
HEADRequest, HEADRequest,
@ -255,7 +258,6 @@ class SoundcloudIE(InfoExtractor):
_API_BASE = 'https://api.soundcloud.com/' _API_BASE = 'https://api.soundcloud.com/'
_API_V2_BASE = 'https://api-v2.soundcloud.com/' _API_V2_BASE = 'https://api-v2.soundcloud.com/'
_BASE_URL = 'https://soundcloud.com/' _BASE_URL = 'https://soundcloud.com/'
_CLIENT_ID = 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk'
_IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg' _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
_ARTWORK_MAP = { _ARTWORK_MAP = {
@ -271,9 +273,49 @@ class SoundcloudIE(InfoExtractor):
'original': 0, 'original': 0,
} }
def _store_client_id(self, client_id):
self._downloader.cache.store('soundcloud', 'client_id', client_id)
def _update_client_id(self):
webpage = self._download_webpage('https://soundcloud.com/', None)
for src in reversed(re.findall(r'<script[^>]+src="([^"]+)"', webpage)):
script = self._download_webpage(src, None, fatal=False)
if script:
client_id = self._search_regex(
r'client_id\s*:\s*"([0-9a-zA-Z]{32})"',
script, 'client id', default=None)
if client_id:
self._CLIENT_ID = client_id
self._store_client_id(client_id)
return
raise ExtractorError('Unable to extract client id')
def _download_json(self, *args, **kwargs):
non_fatal = kwargs.get('fatal') is False
if non_fatal:
del kwargs['fatal']
query = kwargs.get('query', {}).copy()
for _ in range(2):
query['client_id'] = self._CLIENT_ID
kwargs['query'] = query
try:
return super(SoundcloudIE, self)._download_json(*args, **compat_kwargs(kwargs))
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
self._store_client_id(None)
self._update_client_id()
continue
elif non_fatal:
self._downloader.report_warning(error_to_compat_str(e))
return False
raise
def _real_initialize(self):
self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk'
@classmethod @classmethod
def _resolv_url(cls, url): def _resolv_url(cls, url):
return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url + '&client_id=' + cls._CLIENT_ID return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url
def _extract_info_dict(self, info, full_title=None, secret_token=None, version=2): def _extract_info_dict(self, info, full_title=None, secret_token=None, version=2):
track_id = compat_str(info['id']) track_id = compat_str(info['id'])
@ -451,9 +493,7 @@ class SoundcloudIE(InfoExtractor):
track_id = mobj.group('track_id') track_id = mobj.group('track_id')
query = { query = {}
'client_id': self._CLIENT_ID,
}
if track_id: if track_id:
info_json_url = self._API_V2_BASE + 'tracks/' + track_id info_json_url = self._API_V2_BASE + 'tracks/' + track_id
full_title = track_id full_title = track_id
@ -536,7 +576,6 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
def _extract_playlist(self, base_url, playlist_id, playlist_title): def _extract_playlist(self, base_url, playlist_id, playlist_title):
COMMON_QUERY = { COMMON_QUERY = {
'limit': 2000000000, 'limit': 2000000000,
'client_id': self._CLIENT_ID,
'linked_partitioning': '1', 'linked_partitioning': '1',
} }
@ -722,9 +761,7 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id') playlist_id = mobj.group('id')
query = { query = {}
'client_id': self._CLIENT_ID,
}
token = mobj.group('token') token = mobj.group('token')
if token: if token:
query['secret_token'] = token query['secret_token'] = token
@ -761,7 +798,6 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
self._MAX_RESULTS_PER_PAGE) self._MAX_RESULTS_PER_PAGE)
query.update({ query.update({
'limit': limit, 'limit': limit,
'client_id': self._CLIENT_ID,
'linked_partitioning': 1, 'linked_partitioning': 1,
'offset': 0, 'offset': 0,
}) })

View File

@ -4,6 +4,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext,
ExtractorError, ExtractorError,
merge_dicts, merge_dicts,
orderedSet, orderedSet,
@ -64,7 +65,7 @@ class SpankBangIE(InfoExtractor):
url.replace('/%s/embed' % video_id, '/%s/video' % video_id), url.replace('/%s/embed' % video_id, '/%s/video' % video_id),
video_id, headers={'Cookie': 'country=US'}) video_id, headers={'Cookie': 'country=US'})
if re.search(r'<[^>]+\bid=["\']video_removed', webpage): if re.search(r'<[^>]+\b(?:id|class)=["\']video_removed', webpage):
raise ExtractorError( raise ExtractorError(
'Video %s is not available' % video_id, expected=True) 'Video %s is not available' % video_id, expected=True)
@ -75,11 +76,20 @@ class SpankBangIE(InfoExtractor):
if not f_url: if not f_url:
return return
f = parse_resolution(format_id) f = parse_resolution(format_id)
f.update({ ext = determine_ext(f_url)
'url': f_url, if format_id.startswith('m3u8') or ext == 'm3u8':
'format_id': format_id, formats.extend(self._extract_m3u8_formats(
}) f_url, video_id, 'mp4', entry_protocol='m3u8_native',
formats.append(f) m3u8_id='hls', fatal=False))
elif format_id.startswith('mpd') or ext == 'mpd':
formats.extend(self._extract_mpd_formats(
f_url, video_id, mpd_id='dash', fatal=False))
elif ext == 'mp4' or f.get('width') or f.get('height'):
f.update({
'url': f_url,
'format_id': format_id,
})
formats.append(f)
STREAM_URL_PREFIX = 'stream_url_' STREAM_URL_PREFIX = 'stream_url_'
@ -93,28 +103,22 @@ class SpankBangIE(InfoExtractor):
r'data-streamkey\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', r'data-streamkey\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
webpage, 'stream key', group='value') webpage, 'stream key', group='value')
sb_csrf_session = self._get_cookies(
'https://spankbang.com')['sb_csrf_session'].value
stream = self._download_json( stream = self._download_json(
'https://spankbang.com/api/videos/stream', video_id, 'https://spankbang.com/api/videos/stream', video_id,
'Downloading stream JSON', data=urlencode_postdata({ 'Downloading stream JSON', data=urlencode_postdata({
'id': stream_key, 'id': stream_key,
'data': 0, 'data': 0,
'sb_csrf_session': sb_csrf_session,
}), headers={ }), headers={
'Referer': url, 'Referer': url,
'X-CSRFToken': sb_csrf_session, 'X-Requested-With': 'XMLHttpRequest',
}) })
for format_id, format_url in stream.items(): for format_id, format_url in stream.items():
if format_id.startswith(STREAM_URL_PREFIX): if format_url and isinstance(format_url, list):
if format_url and isinstance(format_url, list): format_url = format_url[0]
format_url = format_url[0] extract_format(format_id, format_url)
extract_format(
format_id[len(STREAM_URL_PREFIX):], format_url)
self._sort_formats(formats) self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id'))
info = self._search_json_ld(webpage, video_id, default={}) info = self._search_json_ld(webpage, video_id, default={})

View File

@ -165,8 +165,12 @@ class TeachableIE(TeachableBaseIE):
if any(re.search(p, webpage) for p in ( if any(re.search(p, webpage) for p in (
r'class=["\']lecture-contents-locked', r'class=["\']lecture-contents-locked',
r'>\s*Lecture contents locked', r'>\s*Lecture contents locked',
r'id=["\']lecture-locked')): r'id=["\']lecture-locked',
# https://academy.tailoredtutors.co.uk/courses/108779/lectures/1955313
r'class=["\'](?:inner-)?lesson-locked',
r'>LESSON LOCKED<')):
self.raise_login_required('Lecture contents locked') self.raise_login_required('Lecture contents locked')
raise ExtractorError('Unable to find video URL')
title = self._og_search_title(webpage, default=None) title = self._og_search_title(webpage, default=None)

View File

@ -1,35 +1,50 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import functools
import time
import hashlib import hashlib
import json import json
import random import random
import re
import time
from .adobepass import AdobePassIE from .adobepass import AdobePassIE
from .youtube import YoutubeIE
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE
from ..compat import ( from ..compat import (
compat_HTTPError, compat_HTTPError,
compat_str, compat_str,
) )
from ..utils import ( from ..utils import (
clean_html,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
OnDemandPagedList,
parse_age_limit, parse_age_limit,
str_or_none, str_or_none,
try_get, try_get,
) )
class ViceIE(AdobePassIE): class ViceBaseIE(InfoExtractor):
def _call_api(self, resource, resource_key, resource_id, locale, fields, args=''):
return self._download_json(
'https://video.vice.com/api/v1/graphql', resource_id, query={
'query': '''{
%s(locale: "%s", %s: "%s"%s) {
%s
}
}''' % (resource, locale, resource_key, resource_id, args, fields),
})['data'][resource]
class ViceIE(ViceBaseIE, AdobePassIE):
IE_NAME = 'vice' IE_NAME = 'vice'
_VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?viceland)\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]+)' _VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]{24})'
_TESTS = [{ _TESTS = [{
'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7', 'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7',
'info_dict': { 'info_dict': {
'id': '5e647f0125e145c9aef2069412c0cbde', 'id': '58c69e38a55424f1227dc3f7',
'ext': 'mp4', 'ext': 'mp4',
'title': '10 Questions You Always Wanted To Ask: Pet Cremator', 'title': '10 Questions You Always Wanted To Ask: Pet Cremator',
'description': 'md5:fe856caacf61fe0e74fab15ce2b07ca5', 'description': 'md5:fe856caacf61fe0e74fab15ce2b07ca5',
@ -43,17 +58,16 @@ class ViceIE(AdobePassIE):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['UplynkPreplay'],
}, { }, {
# geo restricted to US # geo restricted to US
'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56', 'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
'info_dict': { 'info_dict': {
'id': '930c0ad1f47141cc955087eecaddb0e2', 'id': '5816510690b70e6c5fd39a56',
'ext': 'mp4', 'ext': 'mp4',
'uploader': 'waypoint', 'uploader': 'vice',
'title': 'The Signal From Tölva', 'title': 'The Signal From Tölva',
'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5', 'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5',
'uploader_id': '57f7d621e05ca860fa9ccaf9', 'uploader_id': '57a204088cb727dec794c67b',
'timestamp': 1477941983, 'timestamp': 1477941983,
'upload_date': '20161031', 'upload_date': '20161031',
}, },
@ -61,15 +75,14 @@ class ViceIE(AdobePassIE):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['UplynkPreplay'],
}, { }, {
'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f', 'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f',
'info_dict': { 'info_dict': {
'id': '581b12b60a0e1f4c0fb6ea2f', 'id': '581b12b60a0e1f4c0fb6ea2f',
'ext': 'mp4', 'ext': 'mp4',
'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1', 'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1',
'description': '<p>Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.</p>', 'description': 'Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.',
'uploader': 'VICE', 'uploader': 'vice',
'uploader_id': '57a204088cb727dec794c67b', 'uploader_id': '57a204088cb727dec794c67b',
'timestamp': 1485368119, 'timestamp': 1485368119,
'upload_date': '20170125', 'upload_date': '20170125',
@ -78,9 +91,7 @@ class ViceIE(AdobePassIE):
'params': { 'params': {
# AES-encrypted m3u8 # AES-encrypted m3u8
'skip_download': True, 'skip_download': True,
'proxy': '127.0.0.1:8118',
}, },
'add_ie': ['UplynkPreplay'],
}, { }, {
'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4', 'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
'only_matching': True, 'only_matching': True,
@ -98,7 +109,7 @@ class ViceIE(AdobePassIE):
@staticmethod @staticmethod
def _extract_urls(webpage): def _extract_urls(webpage):
return re.findall( return re.findall(
r'<iframe\b[^>]+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]+)', r'<iframe\b[^>]+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]{24})',
webpage) webpage)
@staticmethod @staticmethod
@ -109,31 +120,16 @@ class ViceIE(AdobePassIE):
def _real_extract(self, url): def _real_extract(self, url):
locale, video_id = re.match(self._VALID_URL, url).groups() locale, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage( video = self._call_api('videos', 'id', video_id, locale, '''body
'https://video.vice.com/%s/embed/%s' % (locale, video_id), locked
video_id) rating
thumbnail_url
video = self._parse_json( title''')[0]
self._search_regex( title = video['title'].strip()
r'PREFETCH_DATA\s*=\s*({.+?})\s*;\s*\n', webpage,
'app state'), video_id)['video']
video_id = video.get('vms_id') or video.get('id') or video_id
title = video['title']
is_locked = video.get('locked')
rating = video.get('rating') rating = video.get('rating')
thumbnail = video.get('thumbnail_url')
duration = int_or_none(video.get('duration'))
series = try_get(
video, lambda x: x['episode']['season']['show']['title'],
compat_str)
episode_number = try_get(
video, lambda x: x['episode']['episode_number'])
season_number = try_get(
video, lambda x: x['episode']['season']['season_number'])
uploader = None
query = {} query = {}
if is_locked: if video.get('locked'):
resource = self._get_mvpd_resource( resource = self._get_mvpd_resource(
'VICELAND', title, video_id, rating) 'VICELAND', title, video_id, rating)
query['tvetoken'] = self._extract_mvpd_auth( query['tvetoken'] = self._extract_mvpd_auth(
@ -148,12 +144,9 @@ class ViceIE(AdobePassIE):
query.update({ query.update({
'exp': exp, 'exp': exp,
'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(), 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
'_ad_blocked': None, 'skipadstitching': 1,
'_ad_unit': '',
'_debug': '',
'platform': 'desktop', 'platform': 'desktop',
'rn': random.randint(10000, 100000), 'rn': random.randint(10000, 100000),
'fbprebidtoken': '',
}) })
try: try:
@ -169,85 +162,94 @@ class ViceIE(AdobePassIE):
raise raise
video_data = preplay['video'] video_data = preplay['video']
base = video_data['base'] formats = self._extract_m3u8_formats(
uplynk_preplay_url = preplay['preplayURL'] preplay['playURL'], video_id, 'mp4', 'm3u8_native')
episode = video_data.get('episode', {}) self._sort_formats(formats)
channel = video_data.get('channel', {}) episode = video_data.get('episode') or {}
channel = video_data.get('channel') or {}
season = video_data.get('season') or {}
subtitles = {} subtitles = {}
cc_url = preplay.get('ccURL') for subtitle in preplay.get('subtitleURLs', []):
if cc_url: cc_url = subtitle.get('url')
subtitles['en'] = [{ if not cc_url:
continue
language_code = try_get(subtitle, lambda x: x['languages'][0]['language_code'], compat_str) or 'en'
subtitles.setdefault(language_code, []).append({
'url': cc_url, 'url': cc_url,
}] })
return { return {
'_type': 'url_transparent', 'formats': formats,
'url': uplynk_preplay_url,
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': base.get('body') or base.get('display_body'), 'description': clean_html(video.get('body')),
'thumbnail': thumbnail, 'thumbnail': video.get('thumbnail_url'),
'duration': int_or_none(video_data.get('video_duration')) or duration, 'duration': int_or_none(video_data.get('video_duration')),
'timestamp': int_or_none(video_data.get('created_at'), 1000), 'timestamp': int_or_none(video_data.get('created_at'), 1000),
'age_limit': parse_age_limit(video_data.get('video_rating')), 'age_limit': parse_age_limit(video_data.get('video_rating') or rating),
'series': video_data.get('show_title') or series, 'series': try_get(video_data, lambda x: x['show']['base']['display_title'], compat_str),
'episode_number': int_or_none(episode.get('episode_number') or episode_number), 'episode_number': int_or_none(episode.get('episode_number')),
'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')), 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
'season_number': int_or_none(season_number), 'season_number': int_or_none(season.get('season_number')),
'season_id': str_or_none(episode.get('season_id')), 'season_id': str_or_none(season.get('id') or video_data.get('season_id')),
'uploader': channel.get('base', {}).get('title') or channel.get('name') or uploader, 'uploader': channel.get('name'),
'uploader_id': str_or_none(channel.get('id')), 'uploader_id': str_or_none(channel.get('id')),
'subtitles': subtitles, 'subtitles': subtitles,
'ie_key': 'UplynkPreplay',
} }
class ViceShowIE(InfoExtractor): class ViceShowIE(ViceBaseIE):
IE_NAME = 'vice:show' IE_NAME = 'vice:show'
_VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:video\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/show/(?P<id>[^/?#&]+)'
_PAGE_SIZE = 25
_TEST = { _TESTS = [{
'url': 'https://munchies.vice.com/en/show/fuck-thats-delicious-2', 'url': 'https://video.vice.com/en_us/show/fck-thats-delicious',
'info_dict': { 'info_dict': {
'id': 'fuck-thats-delicious-2', 'id': '57a2040c8cb727dec794c901',
'title': "Fuck, That's Delicious", 'title': 'F*ck, Thats Delicious',
'description': 'Follow the culinary adventures of rapper Action Bronson during his ongoing world tour.', 'description': 'The life and eating habits of raps greatest bon vivant, Action Bronson.',
}, },
'playlist_count': 17, 'playlist_mincount': 64,
} }, {
'url': 'https://www.vicetv.com/en_us/show/fck-thats-delicious',
'only_matching': True,
}]
def _fetch_page(self, locale, show_id, page):
videos = self._call_api('videos', 'show_id', show_id, locale, '''body
id
url''', ', page: %d, per_page: %d' % (page + 1, self._PAGE_SIZE))
for video in videos:
yield self.url_result(
video['url'], ViceIE.ie_key(), video.get('id'))
def _real_extract(self, url): def _real_extract(self, url):
show_id = self._match_id(url) locale, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, show_id) show = self._call_api('shows', 'slug', display_id, locale, '''dek
id
title''')[0]
show_id = show['id']
entries = [ entries = OnDemandPagedList(
self.url_result(video_url, ViceIE.ie_key()) functools.partial(self._fetch_page, locale, show_id),
for video_url, _ in re.findall( self._PAGE_SIZE)
r'<h2[^>]+class="article-title"[^>]+data-id="\d+"[^>]*>\s*<a[^>]+href="(%s.*?)"'
% ViceIE._VALID_URL, webpage)]
title = self._search_regex( return self.playlist_result(
r'<title>(.+?)</title>', webpage, 'title', default=None) entries, show_id, show.get('title'), show.get('dek'))
if title:
title = re.sub(r'(.+)\s*\|\s*.+$', r'\1', title).strip()
description = self._html_search_meta(
'description', webpage, 'description')
return self.playlist_result(entries, show_id, title, description)
class ViceArticleIE(InfoExtractor): class ViceArticleIE(ViceBaseIE):
IE_NAME = 'vice:article' IE_NAME = 'vice:article'
_VALID_URL = r'https://www\.vice\.com/[^/]+/article/(?P<id>[^?#]+)' _VALID_URL = r'https://(?:www\.)?vice\.com/(?P<locale>[^/]+)/article/(?:[0-9a-z]{6}/)?(?P<id>[^?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah', 'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
'info_dict': { 'info_dict': {
'id': '41eae2a47b174a1398357cec55f1f6fc', 'id': '58dc0a3dee202d2a0ccfcbd8',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Mormon War on Porn ', 'title': 'Mormon War on Porn',
'description': 'md5:6394a8398506581d0346b9ab89093fef', 'description': 'md5:1c5d91fe25fa8aa304f9def118b92dbf',
'uploader': 'vice', 'uploader': 'vice',
'uploader_id': '57a204088cb727dec794c67b', 'uploader_id': '57a204088cb727dec794c67b',
'timestamp': 1491883129, 'timestamp': 1491883129,
@ -258,10 +260,10 @@ class ViceArticleIE(InfoExtractor):
# AES-encrypted m3u8 # AES-encrypted m3u8
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['UplynkPreplay'], 'add_ie': [ViceIE.ie_key()],
}, { }, {
'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car', 'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
'md5': '7fe8ebc4fa3323efafc127b82bd821d9', 'md5': '13010ee0bc694ea87ec40724397c2349',
'info_dict': { 'info_dict': {
'id': '3jstaBeXgAs', 'id': '3jstaBeXgAs',
'ext': 'mp4', 'ext': 'mp4',
@ -271,15 +273,15 @@ class ViceArticleIE(InfoExtractor):
'uploader_id': 'MotherboardTV', 'uploader_id': 'MotherboardTV',
'upload_date': '20140529', 'upload_date': '20140529',
}, },
'add_ie': ['Youtube'], 'add_ie': [YoutubeIE.ie_key()],
}, { }, {
'url': 'https://www.vice.com/en_us/article/znm9dx/karley-sciortino-slutever-reloaded', 'url': 'https://www.vice.com/en_us/article/znm9dx/karley-sciortino-slutever-reloaded',
'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2', 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
'info_dict': { 'info_dict': {
'id': 'e2ed435eb67e43efb66e6ef9a6930a88', 'id': '57f41d3556a0a80f54726060',
'ext': 'mp4', 'ext': 'mp4',
'title': "Making The World's First Male Sex Doll", 'title': "Making The World's First Male Sex Doll",
'description': 'md5:916078ef0e032d76343116208b6cc2c4', 'description': 'md5:19b00b215b99961cf869c40fbe9df755',
'uploader': 'vice', 'uploader': 'vice',
'uploader_id': '57a204088cb727dec794c67b', 'uploader_id': '57a204088cb727dec794c67b',
'timestamp': 1476919911, 'timestamp': 1476919911,
@ -288,6 +290,7 @@ class ViceArticleIE(InfoExtractor):
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
'format': 'bestvideo',
}, },
'add_ie': [ViceIE.ie_key()], 'add_ie': [ViceIE.ie_key()],
}, { }, {
@ -299,14 +302,11 @@ class ViceArticleIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) locale, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id) article = self._call_api('articles', 'slug', display_id, locale, '''body
embed_code''')[0]
prefetch_data = self._parse_json(self._search_regex( body = article['body']
r'__APP_STATE\s*=\s*({.+?})(?:\s*\|\|\s*{}\s*)?;\s*\n',
webpage, 'app state'), display_id)['pageData']
body = prefetch_data['body']
def _url_res(video_url, ie_key): def _url_res(video_url, ie_key):
return { return {
@ -316,7 +316,7 @@ class ViceArticleIE(InfoExtractor):
'ie_key': ie_key, 'ie_key': ie_key,
} }
vice_url = ViceIE._extract_url(webpage) vice_url = ViceIE._extract_url(body)
if vice_url: if vice_url:
return _url_res(vice_url, ViceIE.ie_key()) return _url_res(vice_url, ViceIE.ie_key())
@ -332,6 +332,6 @@ class ViceArticleIE(InfoExtractor):
video_url = self._html_search_regex( video_url = self._html_search_regex(
r'data-video-url="([^"]+)"', r'data-video-url="([^"]+)"',
prefetch_data['embed_code'], 'video URL') article['embed_code'], 'video URL')
return _url_res(video_url, ViceIE.ie_key()) return _url_res(video_url, ViceIE.ie_key())

View File

@ -13,8 +13,7 @@ from ..utils import (
class WistiaIE(InfoExtractor): class WistiaIE(InfoExtractor):
_VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]{10})' _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]{10})'
_API_URL = 'http://fast.wistia.com/embed/medias/%s.json' _EMBED_BASE_URL = 'http://fast.wistia.com/embed/'
_IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s'
_TESTS = [{ _TESTS = [{
'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt', 'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
@ -67,10 +66,10 @@ class WistiaIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
data_json = self._download_json( data_json = self._download_json(
self._API_URL % video_id, video_id, self._EMBED_BASE_URL + 'medias/%s.json' % video_id, video_id,
# Some videos require this. # Some videos require this.
headers={ headers={
'Referer': url if url.startswith('http') else self._IFRAME_URL % video_id, 'Referer': url if url.startswith('http') else self._EMBED_BASE_URL + 'iframe/' + video_id,
}) })
if data_json.get('error'): if data_json.get('error'):
@ -95,27 +94,61 @@ class WistiaIE(InfoExtractor):
'url': aurl, 'url': aurl,
'width': int_or_none(a.get('width')), 'width': int_or_none(a.get('width')),
'height': int_or_none(a.get('height')), 'height': int_or_none(a.get('height')),
'filesize': int_or_none(a.get('size')),
}) })
else: else:
aext = a.get('ext') aext = a.get('ext')
is_m3u8 = a.get('container') == 'm3u8' or aext == 'm3u8' display_name = a.get('display_name')
formats.append({ format_id = atype
'format_id': atype, if atype and atype.endswith('_video') and display_name:
format_id = '%s-%s' % (atype[:-6], display_name)
f = {
'format_id': format_id,
'url': aurl, 'url': aurl,
'tbr': int_or_none(a.get('bitrate')), 'tbr': int_or_none(a.get('bitrate')) or None,
'vbr': int_or_none(a.get('opt_vbitrate')),
'width': int_or_none(a.get('width')),
'height': int_or_none(a.get('height')),
'filesize': int_or_none(a.get('size')),
'vcodec': a.get('codec'),
'container': a.get('container'),
'ext': 'mp4' if is_m3u8 else aext,
'protocol': 'm3u8' if is_m3u8 else None,
'preference': 1 if atype == 'original' else None, 'preference': 1 if atype == 'original' else None,
}) }
if display_name == 'Audio':
f.update({
'vcodec': 'none',
})
else:
f.update({
'width': int_or_none(a.get('width')),
'height': int_or_none(a.get('height')),
'vcodec': a.get('codec'),
})
if a.get('container') == 'm3u8' or aext == 'm3u8':
ts_f = f.copy()
ts_f.update({
'ext': 'ts',
'format_id': f['format_id'].replace('hls-', 'ts-'),
'url': f['url'].replace('.bin', '.ts'),
})
formats.append(ts_f)
f.update({
'ext': 'mp4',
'protocol': 'm3u8_native',
})
else:
f.update({
'container': a.get('container'),
'ext': aext,
'filesize': int_or_none(a.get('size')),
})
formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {}
for caption in data.get('captions', []):
language = caption.get('language')
if not language:
continue
subtitles[language] = [{
'url': self._EMBED_BASE_URL + 'captions/' + video_id + '.vtt?language=' + language,
}]
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
@ -124,4 +157,5 @@ class WistiaIE(InfoExtractor):
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'duration': float_or_none(data.get('duration')), 'duration': float_or_none(data.get('duration')),
'timestamp': int_or_none(data.get('createdAt')), 'timestamp': int_or_none(data.get('createdAt')),
'subtitles': subtitles,
} }

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2019.12.25' __version__ = '2020.01.01'