Merge branch 'master' into BlenderCloud-issue-13282

This commit is contained in:
Parmjit Virk 2017-06-30 19:28:10 -05:00
commit 6a31a87f38
5 changed files with 86 additions and 22 deletions

View File

@ -36,7 +36,7 @@ class BBCCoUkIE(InfoExtractor):
(?:
programmes/(?!articles/)|
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
music/clips[/#]|
music/(?:clips|audiovideo/popular)[/#]|
radio/player/
)
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
@ -229,8 +229,10 @@ class BBCCoUkIE(InfoExtractor):
}, {
'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
'only_matching': True,
}
]
}, {
'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
'only_matching': True,
}]
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
@ -523,6 +525,12 @@ class BBCCoUkIE(InfoExtractor):
webpage = self._download_webpage(url, group_id, 'Downloading video page')
error = self._search_regex(
r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<',
webpage, 'error', default=None)
if error:
raise ExtractorError(error, expected=True)
programme_id = None
duration = None

View File

@ -1002,17 +1002,17 @@ class InfoExtractor(object):
item_type = e.get('@type')
if expected_type is not None and expected_type != item_type:
return info
if item_type == 'TVEpisode':
if item_type in ('TVEpisode', 'Episode'):
info.update({
'episode': unescapeHTML(e.get('name')),
'episode_number': int_or_none(e.get('episodeNumber')),
'description': unescapeHTML(e.get('description')),
})
part_of_season = e.get('partOfSeason')
if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason':
if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries':
if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
info['series'] = unescapeHTML(part_of_series.get('name'))
elif item_type == 'Article':
info.update({
@ -1022,10 +1022,10 @@ class InfoExtractor(object):
})
elif item_type == 'VideoObject':
extract_video_object(e)
elif item_type == 'WebPage':
video = e.get('video')
if isinstance(video, dict) and video.get('@type') == 'VideoObject':
extract_video_object(video)
continue
video = e.get('video')
if isinstance(video, dict) and video.get('@type') == 'VideoObject':
extract_video_object(video)
break
return dict((k, v) for k, v in info.items() if v is not None)

View File

@ -184,7 +184,7 @@ class DPlayItIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
info_url = self._search_regex(
r'url\s*:\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
webpage, 'video id')
title = remove_end(self._og_search_title(webpage), ' | Dplay')

View File

@ -2048,6 +2048,13 @@ class GenericIE(InfoExtractor):
video_description = self._og_search_description(webpage, default=None)
video_thumbnail = self._og_search_thumbnail(webpage, default=None)
info_dict.update({
'title': video_title,
'description': video_description,
'thumbnail': video_thumbnail,
'age_limit': age_limit,
})
# Look for Brightcove Legacy Studio embeds
bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
if bc_urls:
@ -2684,18 +2691,26 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
def merge_dicts(dict1, dict2):
merged = {}
for k, v in dict1.items():
if v is not None:
merged[k] = v
for k, v in dict2.items():
if v is None:
continue
if (k not in merged or
(isinstance(v, compat_str) and v and
isinstance(merged[k], compat_str) and
not merged[k])):
merged[k] = v
return merged
# Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld(
webpage, video_id, default={}, expected_type='VideoObject')
if json_ld.get('url'):
info_dict.update({
'title': video_title or info_dict['title'],
'description': video_description,
'thumbnail': video_thumbnail,
'age_limit': age_limit
})
info_dict.update(json_ld)
return info_dict
return merge_dicts(json_ld, info_dict)
# Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
@ -2713,9 +2728,7 @@ class GenericIE(InfoExtractor):
if jwplayer_data:
info = self._parse_jwplayer_data(
jwplayer_data, video_id, require_title=False, base_url=url)
if not info.get('title'):
info['title'] = video_title
return info
return merge_dicts(info, info_dict)
def check_video(vurl):
if YoutubeIE.suitable(vurl):

View File

@ -0,0 +1,43 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from .ooyala import OoyalaIE
class TastyTradeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tastytrade\.com/tt/shows/[^/]+/episodes/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.tastytrade.com/tt/shows/market-measures/episodes/correlation-in-short-volatility-06-28-2017',
'info_dict': {
'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
'ext': 'mp4',
'title': 'A History of Teaming',
'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
'duration': 422.255,
},
'params': {
'skip_download': True,
},
'add_ie': ['Ooyala'],
}, {
'url': 'https://www.tastytrade.com/tt/shows/daily-dose/episodes/daily-dose-06-30-2017',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
ooyala_code = self._search_regex(
r'data-media-id=(["\'])(?P<code>(?:(?!\1).)+)\1',
webpage, 'ooyala code', group='code')
info = self._search_json_ld(webpage, display_id, fatal=False)
info.update({
'_type': 'url_transparent',
'ie_key': OoyalaIE.ie_key(),
'url': 'ooyala:%s' % ooyala_code,
'display_id': display_id,
})
return info