diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 7cd4446b4..79ded6ba1 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -36,7 +36,7 @@ class BBCCoUkIE(InfoExtractor): (?: programmes/(?!articles/)| iplayer(?:/[^/]+)?/(?:episode/|playlist/)| - music/clips[/#]| + music/(?:clips|audiovideo/popular)[/#]| radio/player/ ) (?P%s)(?!/(?:episodes|broadcasts|clips)) @@ -229,8 +229,10 @@ class BBCCoUkIE(InfoExtractor): }, { 'url': 'http://www.bbc.co.uk/radio/player/p03cchwf', 'only_matching': True, - } - ] + }, { + 'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55', + 'only_matching': True, + }] _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8' @@ -523,6 +525,12 @@ class BBCCoUkIE(InfoExtractor): webpage = self._download_webpage(url, group_id, 'Downloading video page') + error = self._search_regex( + r']+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<', + webpage, 'error', default=None) + if error: + raise ExtractorError(error, expected=True) + programme_id = None duration = None diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9751ab021..afeb4c5da 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1002,17 +1002,17 @@ class InfoExtractor(object): item_type = e.get('@type') if expected_type is not None and expected_type != item_type: return info - if item_type == 'TVEpisode': + if item_type in ('TVEpisode', 'Episode'): info.update({ 'episode': unescapeHTML(e.get('name')), 'episode_number': int_or_none(e.get('episodeNumber')), 'description': unescapeHTML(e.get('description')), }) part_of_season = e.get('partOfSeason') - if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason': + if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'): info['season_number'] = int_or_none(part_of_season.get('seasonNumber')) part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries') - if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries': + if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'): info['series'] = unescapeHTML(part_of_series.get('name')) elif item_type == 'Article': info.update({ @@ -1022,10 +1022,10 @@ class InfoExtractor(object): }) elif item_type == 'VideoObject': extract_video_object(e) - elif item_type == 'WebPage': - video = e.get('video') - if isinstance(video, dict) and video.get('@type') == 'VideoObject': - extract_video_object(video) + continue + video = e.get('video') + if isinstance(video, dict) and video.get('@type') == 'VideoObject': + extract_video_object(video) break return dict((k, v) for k, v in info.items() if v is not None) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index 87c5dd63e..1a41760f8 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -184,7 +184,7 @@ class DPlayItIE(InfoExtractor): webpage = self._download_webpage(url, display_id) info_url = self._search_regex( - r'url\s*:\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)', + r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)', webpage, 'video id') title = remove_end(self._og_search_title(webpage), ' | Dplay') diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2792ea3cf..f9bff433c 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2048,6 +2048,13 @@ class GenericIE(InfoExtractor): video_description = self._og_search_description(webpage, default=None) video_thumbnail = self._og_search_thumbnail(webpage, default=None) + info_dict.update({ + 'title': video_title, + 'description': video_description, + 'thumbnail': video_thumbnail, + 'age_limit': age_limit, + }) + # Look for Brightcove Legacy Studio embeds bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage) if bc_urls: @@ -2684,18 +2691,26 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key()) + def merge_dicts(dict1, dict2): + merged = {} + for k, v in dict1.items(): + if v is not None: + merged[k] = v + for k, v in dict2.items(): + if v is None: + continue + if (k not in merged or + (isinstance(v, compat_str) and v and + isinstance(merged[k], compat_str) and + not merged[k])): + merged[k] = v + return merged + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') if json_ld.get('url'): - info_dict.update({ - 'title': video_title or info_dict['title'], - 'description': video_description, - 'thumbnail': video_thumbnail, - 'age_limit': age_limit - }) - info_dict.update(json_ld) - return info_dict + return merge_dicts(json_ld, info_dict) # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') @@ -2713,9 +2728,7 @@ class GenericIE(InfoExtractor): if jwplayer_data: info = self._parse_jwplayer_data( jwplayer_data, video_id, require_title=False, base_url=url) - if not info.get('title'): - info['title'] = video_title - return info + return merge_dicts(info, info_dict) def check_video(vurl): if YoutubeIE.suitable(vurl): diff --git a/youtube_dl/extractor/tastytrade.py b/youtube_dl/extractor/tastytrade.py new file mode 100644 index 000000000..7fe96bd5f --- /dev/null +++ b/youtube_dl/extractor/tastytrade.py @@ -0,0 +1,43 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from .ooyala import OoyalaIE + + +class TastyTradeIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tastytrade\.com/tt/shows/[^/]+/episodes/(?P[^/?#&]+)' + + _TESTS = [{ + 'url': 'https://www.tastytrade.com/tt/shows/market-measures/episodes/correlation-in-short-volatility-06-28-2017', + 'info_dict': { + 'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM', + 'ext': 'mp4', + 'title': 'A History of Teaming', + 'description': 'md5:2a9033db8da81f2edffa4c99888140b3', + 'duration': 422.255, + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['Ooyala'], + }, { + 'url': 'https://www.tastytrade.com/tt/shows/daily-dose/episodes/daily-dose-06-30-2017', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + ooyala_code = self._search_regex( + r'data-media-id=(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'ooyala code', group='code') + + info = self._search_json_ld(webpage, display_id, fatal=False) + info.update({ + '_type': 'url_transparent', + 'ie_key': OoyalaIE.ie_key(), + 'url': 'ooyala:%s' % ooyala_code, + 'display_id': display_id, + }) + return info