Merge branch 'master' into PornHub-issue-16078

This commit is contained in:
Parmjit Virk 2018-11-01 14:29:34 -05:00
commit 5e6f0731b1
19 changed files with 416 additions and 89 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.10.05*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.10.29*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.10.05** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.10.29**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2018.10.05 [debug] youtube-dl version 2018.10.29
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -1,3 +1,27 @@
version 2018.10.29
Core
+ [extractor/common] Add validation for JSON-LD URLs
Extractors
+ [sportbox] Add support for matchtv.ru
* [sportbox] Fix extraction (#17978)
* [screencast] Fix extraction (#14590, #14617, #17990)
+ [openload] Add support for oload.icu
+ [ivi] Add support for ivi.tv
* [crunchyroll] Improve extraction failsafeness (#17991)
* [dailymail] Fix formats extraction (#17976)
* [viewster] Reduce format requests
* [cwtv] Handle API errors (#17905)
+ [rutube] Use geo verification headers (#17897)
+ [brightcove:legacy] Add fallbacks to brightcove:new (#13912)
- [tv3] Remove extractor (#10461, #15339)
* [ted] Fix extraction for HTTP and RTMP formats (#5941, #17572, #17894)
+ [openload] Add support for oload.cc (#17823)
+ [patreon] Extract post_file URL (#17792)
* [patreon] Fix extraction (#14502, #10471)
version 2018.10.05 version 2018.10.05
Extractors Extractors

View File

@ -818,7 +818,7 @@
- **Spiegeltv** - **Spiegeltv**
- **sport.francetvinfo.fr** - **sport.francetvinfo.fr**
- **Sport5** - **Sport5**
- **SportBoxEmbed** - **SportBox**
- **SportDeutschland** - **SportDeutschland**
- **SpringboardPlatform** - **SpringboardPlatform**
- **Sprout** - **Sprout**
@ -909,7 +909,6 @@
- **TV2** - **TV2**
- **tv2.hu** - **tv2.hu**
- **TV2Article** - **TV2Article**
- **TV3**
- **TV4**: tv4.se and tv4play.se - **TV4**: tv4.se and tv4play.se
- **TV5MondePlus**: TV5MONDE+ - **TV5MondePlus**: TV5MONDE+
- **TVA** - **TVA**

View File

@ -4,6 +4,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
merge_dicts,
mimetype2ext, mimetype2ext,
url_or_none, url_or_none,
) )
@ -12,59 +13,83 @@ from ..utils import (
class AparatIE(InfoExtractor): class AparatIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)' _VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
_TEST = { _TESTS = [{
'url': 'http://www.aparat.com/v/wP8On', 'url': 'http://www.aparat.com/v/wP8On',
'md5': '131aca2e14fe7c4dcb3c4877ba300c89', 'md5': '131aca2e14fe7c4dcb3c4877ba300c89',
'info_dict': { 'info_dict': {
'id': 'wP8On', 'id': 'wP8On',
'ext': 'mp4', 'ext': 'mp4',
'title': 'تیم گلکسی 11 - زومیت', 'title': 'تیم گلکسی 11 - زومیت',
'age_limit': 0, 'description': 'md5:096bdabcdcc4569f2b8a5e903a3b3028',
'duration': 231,
'timestamp': 1387394859,
'upload_date': '20131218',
'view_count': int,
}, },
# 'skip': 'Extremely unreliable', }, {
} # multiple formats
'url': 'https://www.aparat.com/v/8dflw/',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
# Note: There is an easier-to-parse configuration at # Provides more metadata
# http://www.aparat.com/video/video/config/videohash/%video_id webpage = self._download_webpage(url, video_id, fatal=False)
# but the URL in there does not work
webpage = self._download_webpage(
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
video_id)
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title') if not webpage:
# Note: There is an easier-to-parse configuration at
# http://www.aparat.com/video/video/config/videohash/%video_id
# but the URL in there does not work
webpage = self._download_webpage(
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
video_id)
file_list = self._parse_json( options = self._parse_json(
self._search_regex( self._search_regex(
r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1\s*\)',
'file list'), webpage, 'options', group='value'),
video_id) video_id)
player = options['plugins']['sabaPlayerPlugin']
formats = [] formats = []
for item in file_list[0]: for sources in player['multiSRC']:
file_url = url_or_none(item.get('file')) for item in sources:
if not file_url: if not isinstance(item, dict):
continue continue
ext = mimetype2ext(item.get('type')) file_url = url_or_none(item.get('src'))
label = item.get('label') if not file_url:
formats.append({ continue
'url': file_url, item_type = item.get('type')
'ext': ext, if item_type == 'application/vnd.apple.mpegurl':
'format_id': label or ext, formats.extend(self._extract_m3u8_formats(
'height': int_or_none(self._search_regex( file_url, video_id, 'mp4',
r'(\d+)[pP]', label or '', 'height', default=None)), entry_protocol='m3u8_native', m3u8_id='hls',
}) fatal=False))
self._sort_formats(formats) else:
ext = mimetype2ext(item.get('type'))
label = item.get('label')
formats.append({
'url': file_url,
'ext': ext,
'format_id': 'http-%s' % (label or ext),
'height': int_or_none(self._search_regex(
r'(\d+)[pP]', label or '', 'height',
default=None)),
})
self._sort_formats(
formats, field_preference=('height', 'width', 'tbr', 'format_id'))
thumbnail = self._search_regex( info = self._search_json_ld(webpage, video_id, default={})
r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
return { if not info.get('title'):
info['title'] = player['title']
return merge_dicts(info, {
'id': video_id, 'id': video_id,
'title': title, 'thumbnail': url_or_none(options.get('poster')),
'thumbnail': thumbnail, 'duration': int_or_none(player.get('duration')),
'age_limit': self._family_friendly_search(webpage),
'formats': formats, 'formats': formats,
} })

View File

@ -1,6 +1,7 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import smuggle_url from ..utils import smuggle_url
@ -34,3 +35,32 @@ class CNBCIE(InfoExtractor):
{'force_smil_url': True}), {'force_smil_url': True}),
'id': video_id, 'id': video_id,
} }
class CNBCVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)'
_TEST = {
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
'info_dict': {
'id': '7000031301',
'ext': 'mp4',
'title': "Trump: I don't necessarily agree with raising rates",
'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3',
'timestamp': 1531958400,
'upload_date': '20180719',
'uploader': 'NBCU-CNBC',
},
'params': {
'skip_download': True,
},
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id,
'video id')
return self.url_result(
'http://video.cnbc.com/gallery/?video=%s' % video_id,
CNBCIE.ie_key())

View File

@ -69,6 +69,7 @@ from ..utils import (
update_url_query, update_url_query,
urljoin, urljoin,
url_basename, url_basename,
url_or_none,
xpath_element, xpath_element,
xpath_text, xpath_text,
xpath_with_ns, xpath_with_ns,
@ -1213,10 +1214,10 @@ class InfoExtractor(object):
def extract_video_object(e): def extract_video_object(e):
assert e['@type'] == 'VideoObject' assert e['@type'] == 'VideoObject'
info.update({ info.update({
'url': e.get('contentUrl'), 'url': url_or_none(e.get('contentUrl')),
'title': unescapeHTML(e.get('name')), 'title': unescapeHTML(e.get('name')),
'description': unescapeHTML(e.get('description')), 'description': unescapeHTML(e.get('description')),
'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'), 'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
'duration': parse_duration(e.get('duration')), 'duration': parse_duration(e.get('duration')),
'timestamp': unified_timestamp(e.get('uploadDate')), 'timestamp': unified_timestamp(e.get('uploadDate')),
'filesize': float_or_none(e.get('contentSize')), 'filesize': float_or_none(e.get('contentSize')),

View File

@ -3,6 +3,7 @@ from __future__ import unicode_literals
import re import re
import json import json
import xml.etree.ElementTree as etree
import zlib import zlib
from hashlib import sha1 from hashlib import sha1
@ -398,7 +399,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'Downloading subtitles for ' + sub_name, data={ 'Downloading subtitles for ' + sub_name, data={
'subtitle_script_id': sub_id, 'subtitle_script_id': sub_id,
}) })
if sub_doc is None: if not isinstance(sub_doc, etree.Element):
continue continue
sid = sub_doc.get('id') sid = sub_doc.get('id')
iv = xpath_text(sub_doc, 'iv', 'subtitle iv') iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
@ -515,7 +516,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'video_quality': stream_quality, 'video_quality': stream_quality,
'current_page': url, 'current_page': url,
}) })
if streamdata is not None: if isinstance(streamdata, etree.Element):
stream_info = streamdata.find('./{default}preload/stream_info') stream_info = streamdata.find('./{default}preload/stream_info')
if stream_info is not None: if stream_info is not None:
stream_infos.append(stream_info) stream_infos.append(stream_info)
@ -526,7 +527,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'video_format': stream_format, 'video_format': stream_format,
'video_encode_quality': stream_quality, 'video_encode_quality': stream_quality,
}) })
if stream_info is not None: if isinstance(stream_info, etree.Element):
stream_infos.append(stream_info) stream_infos.append(stream_info)
for stream_info in stream_infos: for stream_info in stream_infos:
video_encode_id = xpath_text(stream_info, './video_encode_id') video_encode_id = xpath_text(stream_info, './video_encode_id')
@ -598,10 +599,22 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
series = self._html_search_regex( series = self._html_search_regex(
r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d', r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
webpage, 'series', fatal=False) webpage, 'series', fatal=False)
season = xpath_text(metadata, 'series_title')
episode = xpath_text(metadata, 'episode_title') or media_metadata.get('title') season = episode = episode_number = duration = thumbnail = None
episode_number = int_or_none(xpath_text(metadata, 'episode_number') or media_metadata.get('episode_number'))
if isinstance(metadata, etree.Element):
season = xpath_text(metadata, 'series_title')
episode = xpath_text(metadata, 'episode_title')
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
duration = float_or_none(media_metadata.get('duration'), 1000)
thumbnail = xpath_text(metadata, 'episode_image_url')
if not episode:
episode = media_metadata.get('title')
if not episode_number:
episode_number = int_or_none(media_metadata.get('episode_number'))
if not thumbnail:
thumbnail = media_metadata.get('thumbnail', {}).get('url')
season_number = int_or_none(self._search_regex( season_number = int_or_none(self._search_regex(
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)', r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
@ -611,8 +624,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'id': video_id, 'id': video_id,
'title': video_title, 'title': video_title,
'description': video_description, 'description': video_description,
'duration': float_or_none(media_metadata.get('duration'), 1000), 'duration': duration,
'thumbnail': xpath_text(metadata, 'episode_image_url') or media_metadata.get('thumbnail', {}).get('url'), 'thumbnail': thumbnail,
'uploader': video_uploader, 'uploader': video_uploader,
'upload_date': video_upload_date, 'upload_date': video_upload_date,
'series': series, 'series': series,

View File

@ -49,6 +49,9 @@ class DailyMailIE(InfoExtractor):
'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id) 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id)
video_sources = self._download_json(sources_url, video_id) video_sources = self._download_json(sources_url, video_id)
body = video_sources.get('body')
if body:
video_sources = body
formats = [] formats = []
for rendition in video_sources['renditions']: for rendition in video_sources['renditions']:

View File

@ -209,7 +209,10 @@ from .cloudy import CloudyIE
from .clubic import ClubicIE from .clubic import ClubicIE
from .clyp import ClypIE from .clyp import ClypIE
from .cmt import CMTIE from .cmt import CMTIE
from .cnbc import CNBCIE from .cnbc import (
CNBCIE,
CNBCVideoIE,
)
from .cnn import ( from .cnn import (
CNNIE, CNNIE,
CNNBlogsIE, CNNBlogsIE,
@ -569,6 +572,10 @@ from .limelight import (
LimelightChannelListIE, LimelightChannelListIE,
) )
from .line import LineTVIE from .line import LineTVIE
from .linkedin import (
LinkedInLearningIE,
LinkedInLearningCourseIE,
)
from .litv import LiTVIE from .litv import LiTVIE
from .liveleak import ( from .liveleak import (
LiveLeakIE, LiveLeakIE,
@ -1043,7 +1050,7 @@ from .spike import (
) )
from .stitcher import StitcherIE from .stitcher import StitcherIE
from .sport5 import Sport5IE from .sport5 import Sport5IE
from .sportbox import SportBoxEmbedIE from .sportbox import SportBoxIE
from .sportdeutschland import SportDeutschlandIE from .sportdeutschland import SportDeutschlandIE
from .springboardplatform import SpringboardPlatformIE from .springboardplatform import SpringboardPlatformIE
from .sprout import SproutIE from .sprout import SproutIE

View File

@ -47,7 +47,7 @@ from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from .rutv import RUTVIE from .rutv import RUTVIE
from .tvc import TVCIE from .tvc import TVCIE
from .sportbox import SportBoxEmbedIE from .sportbox import SportBoxIE
from .smotri import SmotriIE from .smotri import SmotriIE
from .myvi import MyviIE from .myvi import MyviIE
from .condenast import CondeNastIE from .condenast import CondeNastIE
@ -2636,9 +2636,9 @@ class GenericIE(InfoExtractor):
return self.url_result(tvc_url, 'TVC') return self.url_result(tvc_url, 'TVC')
# Look for embedded SportBox player # Look for embedded SportBox player
sportbox_urls = SportBoxEmbedIE._extract_urls(webpage) sportbox_urls = SportBoxIE._extract_urls(webpage)
if sportbox_urls: if sportbox_urls:
return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed') return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key())
# Look for embedded XHamster player # Look for embedded XHamster player
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage) xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)

View File

@ -15,7 +15,7 @@ from ..utils import (
class IviIE(InfoExtractor): class IviIE(InfoExtractor):
IE_DESC = 'ivi.ru' IE_DESC = 'ivi.ru'
IE_NAME = 'ivi' IE_NAME = 'ivi'
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
_GEO_BYPASS = False _GEO_BYPASS = False
_GEO_COUNTRIES = ['RU'] _GEO_COUNTRIES = ['RU']
@ -65,7 +65,11 @@ class IviIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
}, },
'skip': 'Only works from Russia', 'skip': 'Only works from Russia',
} },
{
'url': 'https://www.ivi.tv/watch/33560/',
'only_matching': True,
},
] ]
# Sorted by quality # Sorted by quality

View File

@ -0,0 +1,175 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
urlencode_postdata,
)
class LinkedInLearningBaseIE(InfoExtractor):
_NETRC_MACHINE = 'linkedin'
def _call_api(self, course_slug, fields, video_slug=None, resolution=None):
query = {
'courseSlug': course_slug,
'fields': fields,
'q': 'slugs',
}
sub = ''
if video_slug:
query.update({
'videoSlug': video_slug,
'resolution': '_%s' % resolution,
})
sub = ' %dp' % resolution
api_url = 'https://www.linkedin.com/learning-api/detailedCourses'
return self._download_json(
api_url, video_slug, 'Downloading%s JSON metadata' % sub, headers={
'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value,
}, query=query)['elements'][0]
def _get_video_id(self, urn, course_slug, video_slug):
if urn:
mobj = re.search(r'urn:li:lyndaCourse:\d+,(\d+)', urn)
if mobj:
return mobj.group(1)
return '%s/%s' % (course_slug, video_slug)
def _real_initialize(self):
email, password = self._get_login_info()
if email is None:
return
login_page = self._download_webpage(
'https://www.linkedin.com/uas/login?trk=learning',
None, 'Downloading login page')
action_url = self._search_regex(
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, 'post url',
default='https://www.linkedin.com/uas/login-submit', group='url')
data = self._hidden_inputs(login_page)
data.update({
'session_key': email,
'session_password': password,
})
login_submit_page = self._download_webpage(
action_url, None, 'Logging in',
data=urlencode_postdata(data))
error = self._search_regex(
r'<span[^>]+class="error"[^>]*>\s*(.+?)\s*</span>',
login_submit_page, 'error', default=None)
if error:
raise ExtractorError(error, expected=True)
class LinkedInLearningIE(LinkedInLearningBaseIE):
IE_NAME = 'linkedin:learning'
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P<course_slug>[^/]+)/(?P<id>[^/?#]+)'
_TEST = {
'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals/welcome?autoplay=true',
'md5': 'a1d74422ff0d5e66a792deb996693167',
'info_dict': {
'id': '90426',
'ext': 'mp4',
'title': 'Welcome',
'timestamp': 1430396150.82,
'upload_date': '20150430',
},
}
def _real_extract(self, url):
course_slug, video_slug = re.match(self._VALID_URL, url).groups()
video_data = None
formats = []
for width, height in ((640, 360), (960, 540), (1280, 720)):
video_data = self._call_api(
course_slug, 'selectedVideo', video_slug, height)['selectedVideo']
video_url_data = video_data.get('url') or {}
progressive_url = video_url_data.get('progressiveUrl')
if progressive_url:
formats.append({
'format_id': 'progressive-%dp' % height,
'url': progressive_url,
'height': height,
'width': width,
'source_preference': 1,
})
title = video_data['title']
audio_url = video_data.get('audio', {}).get('progressiveUrl')
if audio_url:
formats.append({
'abr': 64,
'ext': 'm4a',
'format_id': 'audio',
'url': audio_url,
'vcodec': 'none',
})
streaming_url = video_url_data.get('streamingUrl')
if streaming_url:
formats.extend(self._extract_m3u8_formats(
streaming_url, video_slug, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr'))
return {
'id': self._get_video_id(video_data.get('urn'), course_slug, video_slug),
'title': title,
'formats': formats,
'thumbnail': video_data.get('defaultThumbnail'),
'timestamp': float_or_none(video_data.get('publishedOn'), 1000),
'duration': int_or_none(video_data.get('durationInSeconds')),
}
class LinkedInLearningCourseIE(LinkedInLearningBaseIE):
IE_NAME = 'linkedin:learning:course'
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P<id>[^/?#]+)'
_TEST = {
'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals',
'info_dict': {
'id': 'programming-foundations-fundamentals',
'title': 'Programming Foundations: Fundamentals',
'description': 'md5:76e580b017694eb89dc8e8923fff5c86',
},
'playlist_mincount': 61,
}
@classmethod
def suitable(cls, url):
return False if LinkedInLearningIE.suitable(url) else super(LinkedInLearningCourseIE, cls).suitable(url)
def _real_extract(self, url):
course_slug = self._match_id(url)
course_data = self._call_api(course_slug, 'chapters,description,title')
entries = []
for chapter in course_data.get('chapters', []):
chapter_title = chapter.get('title')
for video in chapter.get('videos', []):
video_slug = video.get('slug')
if not video_slug:
continue
entries.append({
'_type': 'url_transparent',
'id': self._get_video_id(video.get('urn'), course_slug, video_slug),
'title': video.get('title'),
'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug),
'chapter': chapter_title,
'ie_key': LinkedInLearningIE.ie_key(),
})
return self.playlist_result(
entries, course_slug,
course_data.get('title'),
course_data.get('description'))

View File

@ -31,6 +31,8 @@ class NJPWWorldIE(InfoExtractor):
'skip': 'Requires login', 'skip': 'Requires login',
} }
_LOGIN_URL = 'https://front.njpwworld.com/auth/login'
def _real_initialize(self): def _real_initialize(self):
self._login() self._login()
@ -40,13 +42,17 @@ class NJPWWorldIE(InfoExtractor):
if not username: if not username:
return True return True
# Setup session (will set necessary cookies)
self._request_webpage(
'https://njpwworld.com/', None, note='Setting up session')
webpage, urlh = self._download_webpage_handle( webpage, urlh = self._download_webpage_handle(
'https://njpwworld.com/auth/login', None, self._LOGIN_URL, None,
note='Logging in', errnote='Unable to login', note='Logging in', errnote='Unable to login',
data=urlencode_postdata({'login_id': username, 'pw': password}), data=urlencode_postdata({'login_id': username, 'pw': password}),
headers={'Referer': 'https://njpwworld.com/auth'}) headers={'Referer': 'https://front.njpwworld.com/auth'})
# /auth/login will return 302 for successful logins # /auth/login will return 302 for successful logins
if urlh.geturl() == 'https://njpwworld.com/auth/login': if urlh.geturl() == self._LOGIN_URL:
self.report_warning('unable to login') self.report_warning('unable to login')
return False return False

View File

@ -243,7 +243,7 @@ class PhantomJSwrapper(object):
class OpenloadIE(InfoExtractor): class OpenloadIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://openload.co/f/kUEfGclsU9o', 'url': 'https://openload.co/f/kUEfGclsU9o',
@ -317,6 +317,9 @@ class OpenloadIE(InfoExtractor):
}, { }, {
'url': 'https://oload.cc/embed/5NEAbI2BDSk', 'url': 'https://oload.cc/embed/5NEAbI2BDSk',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://oload.icu/f/-_i4y_F_Hs8',
'only_matching': True
}] }]
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'

View File

@ -90,6 +90,15 @@ class ScreencastIE(InfoExtractor):
r'src=(.*?)(?:$|&)', video_meta, r'src=(.*?)(?:$|&)', video_meta,
'meta tag video URL', default=None) 'meta tag video URL', default=None)
if video_url is None:
video_url = self._html_search_regex(
r'MediaContentUrl["\']\s*:(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'video url', default=None, group='url')
if video_url is None:
video_url = self._html_search_meta(
'og:video', webpage, default=None)
if video_url is None: if video_url is None:
raise ExtractorError('Cannot find video') raise ExtractorError('Cannot find video')

View File

@ -8,20 +8,24 @@ from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
js_to_json, js_to_json,
merge_dicts,
) )
class SportBoxEmbedIE(InfoExtractor): class SportBoxIE(InfoExtractor):
_VALID_URL = r'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)' _VALID_URL = r'https?://(?:news\.sportbox|matchtv)\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://news.sportbox.ru/vdl/player/ci/211355', 'url': 'http://news.sportbox.ru/vdl/player/ci/211355',
'info_dict': { 'info_dict': {
'id': '211355', 'id': '109158',
'ext': 'mp4', 'ext': 'mp4',
'title': '211355', 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
'description': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 292, 'duration': 292,
'view_count': int, 'view_count': int,
'timestamp': 1426237001,
'upload_date': '20150313',
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
@ -33,12 +37,18 @@ class SportBoxEmbedIE(InfoExtractor):
}, { }, {
'url': 'https://news.sportbox.ru/vdl/player/media/193095', 'url': 'https://news.sportbox.ru/vdl/player/media/193095',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://news.sportbox.ru/vdl/player/media/109158',
'only_matching': True,
}, {
'url': 'https://matchtv.ru/vdl/player/media/109158',
'only_matching': True,
}] }]
@staticmethod @staticmethod
def _extract_urls(webpage): def _extract_urls(webpage):
return re.findall( return re.findall(
r'<iframe[^>]+src="(https?://news\.sportbox\.ru/vdl/player[^"]+)"', r'<iframe[^>]+src="(https?://(?:news\.sportbox|matchtv)\.ru/vdl/player[^"]+)"',
webpage) webpage)
def _real_extract(self, url): def _real_extract(self, url):
@ -46,13 +56,14 @@ class SportBoxEmbedIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
wjplayer_data = self._parse_json( sources = self._parse_json(
self._search_regex( self._search_regex(
r'(?s)wjplayer\(({.+?})\);', webpage, 'wjplayer settings'), r'(?s)playerOptions\.sources(?:WithRes)?\s*=\s*(\[.+?\])\s*;\s*\n',
webpage, 'sources'),
video_id, transform_source=js_to_json) video_id, transform_source=js_to_json)
formats = [] formats = []
for source in wjplayer_data['sources']: for source in sources:
src = source.get('src') src = source.get('src')
if not src: if not src:
continue continue
@ -66,14 +77,23 @@ class SportBoxEmbedIE(InfoExtractor):
}) })
self._sort_formats(formats) self._sort_formats(formats)
player = self._parse_json(
self._search_regex(
r'(?s)playerOptions\s*=\s*({.+?})\s*;\s*\n', webpage,
'player options', default='{}'),
video_id, transform_source=js_to_json)
media_id = player['mediaId']
info = self._search_json_ld(webpage, media_id, default={})
view_count = int_or_none(self._search_regex( view_count = int_or_none(self._search_regex(
r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None)) r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None))
return { return merge_dicts(info, {
'id': video_id, 'id': media_id,
'title': video_id, 'title': self._og_search_title(webpage, default=None) or media_id,
'thumbnail': wjplayer_data.get('poster'), 'thumbnail': player.get('poster'),
'duration': int_or_none(wjplayer_data.get('duration')), 'duration': int_or_none(player.get('duration')),
'view_count': view_count, 'view_count': view_count,
'formats': formats, 'formats': formats,
} })

View File

@ -39,9 +39,17 @@ class ThePlatformBaseIE(OnceIE):
smil_url, video_id, note=note, query={'format': 'SMIL'}, smil_url, video_id, note=note, query={'format': 'SMIL'},
headers=self.geo_verification_headers()) headers=self.geo_verification_headers())
error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src') error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
if error_element is not None and error_element.attrib['src'].startswith( if error_element is not None:
'http://link.theplatform.%s/s/errorFiles/Unavailable.' % self._TP_TLD): exception = find_xpath_attr(
raise ExtractorError(error_element.attrib['abstract'], expected=True) error_element, _x('.//smil:param'), 'name', 'exception')
if exception is not None:
if exception.get('value') == 'GeoLocationBlocked':
self.raise_geo_restricted(error_element.attrib['abstract'])
elif error_element.attrib['src'].startswith(
'http://link.theplatform.%s/s/errorFiles/Unavailable.'
% self._TP_TLD):
raise ExtractorError(
error_element.attrib['abstract'], expected=True)
smil_formats = self._parse_smil_formats( smil_formats = self._parse_smil_formats(
meta, smil_url, video_id, namespace=default_ns, meta, smil_url, video_id, namespace=default_ns,

View File

@ -130,16 +130,16 @@ class ViewsterIE(InfoExtractor):
def concat(suffix, sep='-'): def concat(suffix, sep='-'):
return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix
for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'): medias = self._download_json(
media = self._download_json( 'https://public-api.viewster.com/movies/%s/videos' % entry_id,
'https://public-api.viewster.com/movies/%s/video' % entry_id, video_id, fatal=False, query={
video_id, 'Downloading %s JSON' % concat(media_type, ' '), fatal=False, query={ 'mediaTypes': ['application/f4m+xml', 'application/x-mpegURL', 'video/mp4'],
'mediaType': media_type, 'language': audio,
'language': audio, 'subtitle': subtitle,
'subtitle': subtitle, })
}) if not medias:
if not media: continue
continue for media in medias:
video_url = media.get('Uri') video_url = media.get('Uri')
if not video_url: if not video_url:
continue continue

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2018.10.05' __version__ = '2018.10.29'