diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 058eb4321..aefed163a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.10.05*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.10.05** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.10.29*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.10.29** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.10.05 +[debug] youtube-dl version 2018.10.29 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 86cf489b1..57dbde12d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,27 @@ +version 2018.10.29 + +Core ++ [extractor/common] Add validation for JSON-LD URLs + +Extractors ++ [sportbox] Add support for matchtv.ru +* [sportbox] Fix extraction (#17978) +* [screencast] Fix extraction (#14590, #14617, #17990) ++ [openload] Add support for oload.icu ++ [ivi] Add support for ivi.tv +* [crunchyroll] Improve extraction failsafeness (#17991) +* [dailymail] Fix formats extraction (#17976) +* [viewster] Reduce format requests +* [cwtv] Handle API errors (#17905) ++ [rutube] Use geo verification headers (#17897) ++ [brightcove:legacy] Add fallbacks to brightcove:new (#13912) +- [tv3] Remove extractor (#10461, #15339) +* [ted] Fix extraction for HTTP and RTMP formats (#5941, #17572, #17894) ++ [openload] Add support for oload.cc (#17823) ++ [patreon] Extract post_file URL (#17792) +* [patreon] Fix extraction (#14502, #10471) + + version 2018.10.05 Extractors diff --git a/docs/supportedsites.md b/docs/supportedsites.md index f167a6ddc..e5a6879bc 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -818,7 +818,7 @@ - **Spiegeltv** - **sport.francetvinfo.fr** - **Sport5** - - **SportBoxEmbed** + - **SportBox** - **SportDeutschland** - **SpringboardPlatform** - **Sprout** @@ -909,7 +909,6 @@ - **TV2** - **tv2.hu** - **TV2Article** - - **TV3** - **TV4**: tv4.se and tv4play.se - **TV5MondePlus**: TV5MONDE+ - **TVA** diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py index 6eb8bbb6e..883dcee7a 100644 --- a/youtube_dl/extractor/aparat.py +++ b/youtube_dl/extractor/aparat.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( int_or_none, + merge_dicts, mimetype2ext, url_or_none, ) @@ -12,59 +13,83 @@ from ..utils import ( class AparatIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P[a-zA-Z0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.aparat.com/v/wP8On', 'md5': '131aca2e14fe7c4dcb3c4877ba300c89', 'info_dict': { 'id': 'wP8On', 'ext': 'mp4', 'title': 'تیم گلکسی 11 - زومیت', - 'age_limit': 0, + 'description': 'md5:096bdabcdcc4569f2b8a5e903a3b3028', + 'duration': 231, + 'timestamp': 1387394859, + 'upload_date': '20131218', + 'view_count': int, }, - # 'skip': 'Extremely unreliable', - } + }, { + # multiple formats + 'url': 'https://www.aparat.com/v/8dflw/', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - # Note: There is an easier-to-parse configuration at - # http://www.aparat.com/video/video/config/videohash/%video_id - # but the URL in there does not work - webpage = self._download_webpage( - 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id, - video_id) + # Provides more metadata + webpage = self._download_webpage(url, video_id, fatal=False) - title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title') + if not webpage: + # Note: There is an easier-to-parse configuration at + # http://www.aparat.com/video/video/config/videohash/%video_id + # but the URL in there does not work + webpage = self._download_webpage( + 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id, + video_id) - file_list = self._parse_json( + options = self._parse_json( self._search_regex( - r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, - 'file list'), + r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P(?:(?!\1).)+)\1\s*\)', + webpage, 'options', group='value'), video_id) + player = options['plugins']['sabaPlayerPlugin'] + formats = [] - for item in file_list[0]: - file_url = url_or_none(item.get('file')) - if not file_url: - continue - ext = mimetype2ext(item.get('type')) - label = item.get('label') - formats.append({ - 'url': file_url, - 'ext': ext, - 'format_id': label or ext, - 'height': int_or_none(self._search_regex( - r'(\d+)[pP]', label or '', 'height', default=None)), - }) - self._sort_formats(formats) + for sources in player['multiSRC']: + for item in sources: + if not isinstance(item, dict): + continue + file_url = url_or_none(item.get('src')) + if not file_url: + continue + item_type = item.get('type') + if item_type == 'application/vnd.apple.mpegurl': + formats.extend(self._extract_m3u8_formats( + file_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False)) + else: + ext = mimetype2ext(item.get('type')) + label = item.get('label') + formats.append({ + 'url': file_url, + 'ext': ext, + 'format_id': 'http-%s' % (label or ext), + 'height': int_or_none(self._search_regex( + r'(\d+)[pP]', label or '', 'height', + default=None)), + }) + self._sort_formats( + formats, field_preference=('height', 'width', 'tbr', 'format_id')) - thumbnail = self._search_regex( - r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) + info = self._search_json_ld(webpage, video_id, default={}) - return { + if not info.get('title'): + info['title'] = player['title'] + + return merge_dicts(info, { 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'age_limit': self._family_friendly_search(webpage), + 'thumbnail': url_or_none(options.get('poster')), + 'duration': int_or_none(player.get('duration')), 'formats': formats, - } + }) diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py index d354d9f95..6889b0f40 100644 --- a/youtube_dl/extractor/cnbc.py +++ b/youtube_dl/extractor/cnbc.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals + from .common import InfoExtractor from ..utils import smuggle_url @@ -34,3 +35,32 @@ class CNBCIE(InfoExtractor): {'force_smil_url': True}), 'id': video_id, } + + +class CNBCVideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P[^./?#&]+)' + _TEST = { + 'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', + 'info_dict': { + 'id': '7000031301', + 'ext': 'mp4', + 'title': "Trump: I don't necessarily agree with raising rates", + 'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3', + 'timestamp': 1531958400, + 'upload_date': '20180719', + 'uploader': 'NBCU-CNBC', + }, + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex( + r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id, + 'video id') + return self.url_result( + 'http://video.cnbc.com/gallery/?video=%s' % video_id, + CNBCIE.ie_key()) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2dbf81e6e..8452125c8 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -69,6 +69,7 @@ from ..utils import ( update_url_query, urljoin, url_basename, + url_or_none, xpath_element, xpath_text, xpath_with_ns, @@ -1213,10 +1214,10 @@ class InfoExtractor(object): def extract_video_object(e): assert e['@type'] == 'VideoObject' info.update({ - 'url': e.get('contentUrl'), + 'url': url_or_none(e.get('contentUrl')), 'title': unescapeHTML(e.get('name')), 'description': unescapeHTML(e.get('description')), - 'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'), + 'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')), 'duration': parse_duration(e.get('duration')), 'timestamp': unified_timestamp(e.get('uploadDate')), 'filesize': float_or_none(e.get('contentSize')), diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 045be0ab5..4a68d092b 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re import json +import xml.etree.ElementTree as etree import zlib from hashlib import sha1 @@ -398,7 +399,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'Downloading subtitles for ' + sub_name, data={ 'subtitle_script_id': sub_id, }) - if sub_doc is None: + if not isinstance(sub_doc, etree.Element): continue sid = sub_doc.get('id') iv = xpath_text(sub_doc, 'iv', 'subtitle iv') @@ -515,7 +516,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'video_quality': stream_quality, 'current_page': url, }) - if streamdata is not None: + if isinstance(streamdata, etree.Element): stream_info = streamdata.find('./{default}preload/stream_info') if stream_info is not None: stream_infos.append(stream_info) @@ -526,7 +527,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'video_format': stream_format, 'video_encode_quality': stream_quality, }) - if stream_info is not None: + if isinstance(stream_info, etree.Element): stream_infos.append(stream_info) for stream_info in stream_infos: video_encode_id = xpath_text(stream_info, './video_encode_id') @@ -598,10 +599,22 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text series = self._html_search_regex( r'(?s)]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)]+id=["\']showmedia_about_episode_num[^>]+>.+?\s*

\s*Season (\d+)', @@ -611,8 +624,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'id': video_id, 'title': video_title, 'description': video_description, - 'duration': float_or_none(media_metadata.get('duration'), 1000), - 'thumbnail': xpath_text(metadata, 'episode_image_url') or media_metadata.get('thumbnail', {}).get('url'), + 'duration': duration, + 'thumbnail': thumbnail, 'uploader': video_uploader, 'upload_date': video_upload_date, 'series': series, diff --git a/youtube_dl/extractor/dailymail.py b/youtube_dl/extractor/dailymail.py index af3978035..4f75a2a30 100644 --- a/youtube_dl/extractor/dailymail.py +++ b/youtube_dl/extractor/dailymail.py @@ -49,6 +49,9 @@ class DailyMailIE(InfoExtractor): 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id) video_sources = self._download_json(sources_url, video_id) + body = video_sources.get('body') + if body: + video_sources = body formats = [] for rendition in video_sources['renditions']: diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 17b576df3..8879f5d90 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -209,7 +209,10 @@ from .cloudy import CloudyIE from .clubic import ClubicIE from .clyp import ClypIE from .cmt import CMTIE -from .cnbc import CNBCIE +from .cnbc import ( + CNBCIE, + CNBCVideoIE, +) from .cnn import ( CNNIE, CNNBlogsIE, @@ -569,6 +572,10 @@ from .limelight import ( LimelightChannelListIE, ) from .line import LineTVIE +from .linkedin import ( + LinkedInLearningIE, + LinkedInLearningCourseIE, +) from .litv import LiTVIE from .liveleak import ( LiveLeakIE, @@ -1043,7 +1050,7 @@ from .spike import ( ) from .stitcher import StitcherIE from .sport5 import Sport5IE -from .sportbox import SportBoxEmbedIE +from .sportbox import SportBoxIE from .sportdeutschland import SportDeutschlandIE from .springboardplatform import SpringboardPlatformIE from .sprout import SproutIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2a48667f0..545e03371 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -47,7 +47,7 @@ from .nbc import NBCSportsVPlayerIE from .ooyala import OoyalaIE from .rutv import RUTVIE from .tvc import TVCIE -from .sportbox import SportBoxEmbedIE +from .sportbox import SportBoxIE from .smotri import SmotriIE from .myvi import MyviIE from .condenast import CondeNastIE @@ -2636,9 +2636,9 @@ class GenericIE(InfoExtractor): return self.url_result(tvc_url, 'TVC') # Look for embedded SportBox player - sportbox_urls = SportBoxEmbedIE._extract_urls(webpage) + sportbox_urls = SportBoxIE._extract_urls(webpage) if sportbox_urls: - return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed') + return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key()) # Look for embedded XHamster player xhamster_urls = XHamsterEmbedIE._extract_urls(webpage) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index cb51cef2d..86c014b07 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -15,7 +15,7 @@ from ..utils import ( class IviIE(InfoExtractor): IE_DESC = 'ivi.ru' IE_NAME = 'ivi' - _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P\d+)' _GEO_BYPASS = False _GEO_COUNTRIES = ['RU'] @@ -65,7 +65,11 @@ class IviIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', }, 'skip': 'Only works from Russia', - } + }, + { + 'url': 'https://www.ivi.tv/watch/33560/', + 'only_matching': True, + }, ] # Sorted by quality diff --git a/youtube_dl/extractor/linkedin.py b/youtube_dl/extractor/linkedin.py new file mode 100644 index 000000000..259fc4c5e --- /dev/null +++ b/youtube_dl/extractor/linkedin.py @@ -0,0 +1,175 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + urlencode_postdata, +) + + +class LinkedInLearningBaseIE(InfoExtractor): + _NETRC_MACHINE = 'linkedin' + + def _call_api(self, course_slug, fields, video_slug=None, resolution=None): + query = { + 'courseSlug': course_slug, + 'fields': fields, + 'q': 'slugs', + } + sub = '' + if video_slug: + query.update({ + 'videoSlug': video_slug, + 'resolution': '_%s' % resolution, + }) + sub = ' %dp' % resolution + api_url = 'https://www.linkedin.com/learning-api/detailedCourses' + return self._download_json( + api_url, video_slug, 'Downloading%s JSON metadata' % sub, headers={ + 'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value, + }, query=query)['elements'][0] + + def _get_video_id(self, urn, course_slug, video_slug): + if urn: + mobj = re.search(r'urn:li:lyndaCourse:\d+,(\d+)', urn) + if mobj: + return mobj.group(1) + return '%s/%s' % (course_slug, video_slug) + + def _real_initialize(self): + email, password = self._get_login_info() + if email is None: + return + + login_page = self._download_webpage( + 'https://www.linkedin.com/uas/login?trk=learning', + None, 'Downloading login page') + action_url = self._search_regex( + r']+action=(["\'])(?P.+?)\1', login_page, 'post url', + default='https://www.linkedin.com/uas/login-submit', group='url') + data = self._hidden_inputs(login_page) + data.update({ + 'session_key': email, + 'session_password': password, + }) + login_submit_page = self._download_webpage( + action_url, None, 'Logging in', + data=urlencode_postdata(data)) + error = self._search_regex( + r']+class="error"[^>]*>\s*(.+?)\s*', + login_submit_page, 'error', default=None) + if error: + raise ExtractorError(error, expected=True) + + +class LinkedInLearningIE(LinkedInLearningBaseIE): + IE_NAME = 'linkedin:learning' + _VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P[^/]+)/(?P[^/?#]+)' + _TEST = { + 'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals/welcome?autoplay=true', + 'md5': 'a1d74422ff0d5e66a792deb996693167', + 'info_dict': { + 'id': '90426', + 'ext': 'mp4', + 'title': 'Welcome', + 'timestamp': 1430396150.82, + 'upload_date': '20150430', + }, + } + + def _real_extract(self, url): + course_slug, video_slug = re.match(self._VALID_URL, url).groups() + + video_data = None + formats = [] + for width, height in ((640, 360), (960, 540), (1280, 720)): + video_data = self._call_api( + course_slug, 'selectedVideo', video_slug, height)['selectedVideo'] + + video_url_data = video_data.get('url') or {} + progressive_url = video_url_data.get('progressiveUrl') + if progressive_url: + formats.append({ + 'format_id': 'progressive-%dp' % height, + 'url': progressive_url, + 'height': height, + 'width': width, + 'source_preference': 1, + }) + + title = video_data['title'] + + audio_url = video_data.get('audio', {}).get('progressiveUrl') + if audio_url: + formats.append({ + 'abr': 64, + 'ext': 'm4a', + 'format_id': 'audio', + 'url': audio_url, + 'vcodec': 'none', + }) + + streaming_url = video_url_data.get('streamingUrl') + if streaming_url: + formats.extend(self._extract_m3u8_formats( + streaming_url, video_slug, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False)) + + self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr')) + + return { + 'id': self._get_video_id(video_data.get('urn'), course_slug, video_slug), + 'title': title, + 'formats': formats, + 'thumbnail': video_data.get('defaultThumbnail'), + 'timestamp': float_or_none(video_data.get('publishedOn'), 1000), + 'duration': int_or_none(video_data.get('durationInSeconds')), + } + + +class LinkedInLearningCourseIE(LinkedInLearningBaseIE): + IE_NAME = 'linkedin:learning:course' + _VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P[^/?#]+)' + _TEST = { + 'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals', + 'info_dict': { + 'id': 'programming-foundations-fundamentals', + 'title': 'Programming Foundations: Fundamentals', + 'description': 'md5:76e580b017694eb89dc8e8923fff5c86', + }, + 'playlist_mincount': 61, + } + + @classmethod + def suitable(cls, url): + return False if LinkedInLearningIE.suitable(url) else super(LinkedInLearningCourseIE, cls).suitable(url) + + def _real_extract(self, url): + course_slug = self._match_id(url) + course_data = self._call_api(course_slug, 'chapters,description,title') + + entries = [] + for chapter in course_data.get('chapters', []): + chapter_title = chapter.get('title') + for video in chapter.get('videos', []): + video_slug = video.get('slug') + if not video_slug: + continue + entries.append({ + '_type': 'url_transparent', + 'id': self._get_video_id(video.get('urn'), course_slug, video_slug), + 'title': video.get('title'), + 'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug), + 'chapter': chapter_title, + 'ie_key': LinkedInLearningIE.ie_key(), + }) + + return self.playlist_result( + entries, course_slug, + course_data.get('title'), + course_data.get('description')) diff --git a/youtube_dl/extractor/njpwworld.py b/youtube_dl/extractor/njpwworld.py index febef097a..025c5d249 100644 --- a/youtube_dl/extractor/njpwworld.py +++ b/youtube_dl/extractor/njpwworld.py @@ -31,6 +31,8 @@ class NJPWWorldIE(InfoExtractor): 'skip': 'Requires login', } + _LOGIN_URL = 'https://front.njpwworld.com/auth/login' + def _real_initialize(self): self._login() @@ -40,13 +42,17 @@ class NJPWWorldIE(InfoExtractor): if not username: return True + # Setup session (will set necessary cookies) + self._request_webpage( + 'https://njpwworld.com/', None, note='Setting up session') + webpage, urlh = self._download_webpage_handle( - 'https://njpwworld.com/auth/login', None, + self._LOGIN_URL, None, note='Logging in', errnote='Unable to login', data=urlencode_postdata({'login_id': username, 'pw': password}), - headers={'Referer': 'https://njpwworld.com/auth'}) + headers={'Referer': 'https://front.njpwworld.com/auth'}) # /auth/login will return 302 for successful logins - if urlh.geturl() == 'https://njpwworld.com/auth/login': + if urlh.geturl() == self._LOGIN_URL: self.report_warning('unable to login') return False diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index c652603a5..a91f29f5c 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -243,7 +243,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc))/(?:f|embed)/(?P[a-zA-Z0-9-_]+)' + _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu))/(?:f|embed)/(?P[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -317,6 +317,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.cc/embed/5NEAbI2BDSk', 'only_matching': True, + }, { + 'url': 'https://oload.icu/f/-_i4y_F_Hs8', + 'only_matching': True }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py index 62a6a8337..69a0d01f3 100644 --- a/youtube_dl/extractor/screencast.py +++ b/youtube_dl/extractor/screencast.py @@ -90,6 +90,15 @@ class ScreencastIE(InfoExtractor): r'src=(.*?)(?:$|&)', video_meta, 'meta tag video URL', default=None) + if video_url is None: + video_url = self._html_search_regex( + r'MediaContentUrl["\']\s*:(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'video url', default=None, group='url') + + if video_url is None: + video_url = self._html_search_meta( + 'og:video', webpage, default=None) + if video_url is None: raise ExtractorError('Cannot find video') diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index 54497c880..b9017fd2a 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -8,20 +8,24 @@ from ..utils import ( determine_ext, int_or_none, js_to_json, + merge_dicts, ) -class SportBoxEmbedIE(InfoExtractor): - _VALID_URL = r'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P\d+)' +class SportBoxIE(InfoExtractor): + _VALID_URL = r'https?://(?:news\.sportbox|matchtv)\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P\d+)' _TESTS = [{ 'url': 'http://news.sportbox.ru/vdl/player/ci/211355', 'info_dict': { - 'id': '211355', + 'id': '109158', 'ext': 'mp4', - 'title': '211355', + 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»', + 'description': 'В Новороссийске прошел детский турнир «Поле славы боевой»', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 292, 'view_count': int, + 'timestamp': 1426237001, + 'upload_date': '20150313', }, 'params': { # m3u8 download @@ -33,12 +37,18 @@ class SportBoxEmbedIE(InfoExtractor): }, { 'url': 'https://news.sportbox.ru/vdl/player/media/193095', 'only_matching': True, + }, { + 'url': 'https://news.sportbox.ru/vdl/player/media/109158', + 'only_matching': True, + }, { + 'url': 'https://matchtv.ru/vdl/player/media/109158', + 'only_matching': True, }] @staticmethod def _extract_urls(webpage): return re.findall( - r']+src="(https?://news\.sportbox\.ru/vdl/player[^"]+)"', + r']+src="(https?://(?:news\.sportbox|matchtv)\.ru/vdl/player[^"]+)"', webpage) def _real_extract(self, url): @@ -46,13 +56,14 @@ class SportBoxEmbedIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - wjplayer_data = self._parse_json( + sources = self._parse_json( self._search_regex( - r'(?s)wjplayer\(({.+?})\);', webpage, 'wjplayer settings'), + r'(?s)playerOptions\.sources(?:WithRes)?\s*=\s*(\[.+?\])\s*;\s*\n', + webpage, 'sources'), video_id, transform_source=js_to_json) formats = [] - for source in wjplayer_data['sources']: + for source in sources: src = source.get('src') if not src: continue @@ -66,14 +77,23 @@ class SportBoxEmbedIE(InfoExtractor): }) self._sort_formats(formats) + player = self._parse_json( + self._search_regex( + r'(?s)playerOptions\s*=\s*({.+?})\s*;\s*\n', webpage, + 'player options', default='{}'), + video_id, transform_source=js_to_json) + media_id = player['mediaId'] + + info = self._search_json_ld(webpage, media_id, default={}) + view_count = int_or_none(self._search_regex( r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None)) - return { - 'id': video_id, - 'title': video_id, - 'thumbnail': wjplayer_data.get('poster'), - 'duration': int_or_none(wjplayer_data.get('duration')), + return merge_dicts(info, { + 'id': media_id, + 'title': self._og_search_title(webpage, default=None) or media_id, + 'thumbnail': player.get('poster'), + 'duration': int_or_none(player.get('duration')), 'view_count': view_count, 'formats': formats, - } + }) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index ffef5bf06..181620615 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -39,9 +39,17 @@ class ThePlatformBaseIE(OnceIE): smil_url, video_id, note=note, query={'format': 'SMIL'}, headers=self.geo_verification_headers()) error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src') - if error_element is not None and error_element.attrib['src'].startswith( - 'http://link.theplatform.%s/s/errorFiles/Unavailable.' % self._TP_TLD): - raise ExtractorError(error_element.attrib['abstract'], expected=True) + if error_element is not None: + exception = find_xpath_attr( + error_element, _x('.//smil:param'), 'name', 'exception') + if exception is not None: + if exception.get('value') == 'GeoLocationBlocked': + self.raise_geo_restricted(error_element.attrib['abstract']) + elif error_element.attrib['src'].startswith( + 'http://link.theplatform.%s/s/errorFiles/Unavailable.' + % self._TP_TLD): + raise ExtractorError( + error_element.attrib['abstract'], expected=True) smil_formats = self._parse_smil_formats( meta, smil_url, video_id, namespace=default_ns, diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py index d5d5b4c69..6e318479c 100644 --- a/youtube_dl/extractor/viewster.py +++ b/youtube_dl/extractor/viewster.py @@ -130,16 +130,16 @@ class ViewsterIE(InfoExtractor): def concat(suffix, sep='-'): return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix - for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'): - media = self._download_json( - 'https://public-api.viewster.com/movies/%s/video' % entry_id, - video_id, 'Downloading %s JSON' % concat(media_type, ' '), fatal=False, query={ - 'mediaType': media_type, - 'language': audio, - 'subtitle': subtitle, - }) - if not media: - continue + medias = self._download_json( + 'https://public-api.viewster.com/movies/%s/videos' % entry_id, + video_id, fatal=False, query={ + 'mediaTypes': ['application/f4m+xml', 'application/x-mpegURL', 'video/mp4'], + 'language': audio, + 'subtitle': subtitle, + }) + if not medias: + continue + for media in medias: video_url = media.get('Uri') if not video_url: continue diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7d3f25019..ae9a77966 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.10.05' +__version__ = '2018.10.29'