Merge branch 'master' into BlenderCloud-issue-13282

2017-06-30 19:28:10 -05:00 · 2017-06-30 19:28:10 -05:00 · 6a31a87f38
commit 6a31a87f38
parent 9cf9cb22bd 54faac2235
5 changed files with 86 additions and 22 deletions
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@ -36,7 +36,7 @@ class BBCCoUkIE(InfoExtractor):
                        (?:
                            programmes/(?!articles/)|
                            iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
-                            music/clips[/#]|
+                            music/(?:clips|audiovideo/popular)[/#]|
                            radio/player/
                        )
                        (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
@ -229,8 +229,10 @@ class BBCCoUkIE(InfoExtractor):
        }, {
            'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
            'only_matching': True,
-        }
+        }, {
-    ]
+            'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
            'only_matching': True,
        }]
    _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
@ -523,6 +525,12 @@ class BBCCoUkIE(InfoExtractor):
        webpage = self._download_webpage(url, group_id, 'Downloading video page')
        error = self._search_regex(
            r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<',
            webpage, 'error', default=None)
        if error:
            raise ExtractorError(error, expected=True)
        programme_id = None
        duration = None
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -1002,17 +1002,17 @@ class InfoExtractor(object):
                item_type = e.get('@type')
                if expected_type is not None and expected_type != item_type:
                    return info
-                if item_type == 'TVEpisode':
+                if item_type in ('TVEpisode', 'Episode'):
                    info.update({
                        'episode': unescapeHTML(e.get('name')),
                        'episode_number': int_or_none(e.get('episodeNumber')),
                        'description': unescapeHTML(e.get('description')),
                    })
                    part_of_season = e.get('partOfSeason')
-                    if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason':
+                    if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
                        info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
                    part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
-                    if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries':
+                    if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
                        info['series'] = unescapeHTML(part_of_series.get('name'))
                elif item_type == 'Article':
                    info.update({
@ -1022,10 +1022,10 @@ class InfoExtractor(object):
                    })
                elif item_type == 'VideoObject':
                    extract_video_object(e)
-                elif item_type == 'WebPage':
+                    continue
-                    video = e.get('video')
+                video = e.get('video')
-                    if isinstance(video, dict) and video.get('@type') == 'VideoObject':
+                if isinstance(video, dict) and video.get('@type') == 'VideoObject':
-                        extract_video_object(video)
+                    extract_video_object(video)
                break
        return dict((k, v) for k, v in info.items() if v is not None)
--- a/youtube_dl/extractor/dplay.py
+++ b/youtube_dl/extractor/dplay.py
@ -184,7 +184,7 @@ class DPlayItIE(InfoExtractor):
        webpage = self._download_webpage(url, display_id)
        info_url = self._search_regex(
-            r'url\s*:\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
+            r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
            webpage, 'video id')
        title = remove_end(self._og_search_title(webpage), ' | Dplay')
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -2048,6 +2048,13 @@ class GenericIE(InfoExtractor):
        video_description = self._og_search_description(webpage, default=None)
        video_thumbnail = self._og_search_thumbnail(webpage, default=None)
        info_dict.update({
            'title': video_title,
            'description': video_description,
            'thumbnail': video_thumbnail,
            'age_limit': age_limit,
        })
        # Look for Brightcove Legacy Studio embeds
        bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
        if bc_urls:
@ -2684,18 +2691,26 @@ class GenericIE(InfoExtractor):
            return self.playlist_from_matches(
                mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
        def merge_dicts(dict1, dict2):
            merged = {}
            for k, v in dict1.items():
                if v is not None:
                    merged[k] = v
            for k, v in dict2.items():
                if v is None:
                    continue
                if (k not in merged or
                        (isinstance(v, compat_str) and v and
                            isinstance(merged[k], compat_str) and
                            not merged[k])):
                    merged[k] = v
            return merged
        # Looking for http://schema.org/VideoObject
        json_ld = self._search_json_ld(
            webpage, video_id, default={}, expected_type='VideoObject')
        if json_ld.get('url'):
-            info_dict.update({
+            return merge_dicts(json_ld, info_dict)
                'title': video_title or info_dict['title'],
                'description': video_description,
                'thumbnail': video_thumbnail,
                'age_limit': age_limit
            })
            info_dict.update(json_ld)
            return info_dict
        # Look for HTML5 media
        entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
@ -2713,9 +2728,7 @@ class GenericIE(InfoExtractor):
        if jwplayer_data:
            info = self._parse_jwplayer_data(
                jwplayer_data, video_id, require_title=False, base_url=url)
-            if not info.get('title'):
+            return merge_dicts(info, info_dict)
                info['title'] = video_title
            return info
        def check_video(vurl):
            if YoutubeIE.suitable(vurl):
--- a/youtube_dl/extractor/tastytrade.py
+++ b/youtube_dl/extractor/tastytrade.py
@ -0,0 +1,43 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from .ooyala import OoyalaIE
 class TastyTradeIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?tastytrade\.com/tt/shows/[^/]+/episodes/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'https://www.tastytrade.com/tt/shows/market-measures/episodes/correlation-in-short-volatility-06-28-2017',
        'info_dict': {
            'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
            'ext': 'mp4',
            'title': 'A History of Teaming',
            'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
            'duration': 422.255,
        },
        'params': {
            'skip_download': True,
        },
        'add_ie': ['Ooyala'],
    }, {
        'url': 'https://www.tastytrade.com/tt/shows/daily-dose/episodes/daily-dose-06-30-2017',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        ooyala_code = self._search_regex(
            r'data-media-id=(["\'])(?P<code>(?:(?!\1).)+)\1',
            webpage, 'ooyala code', group='code')
        info = self._search_json_ld(webpage, display_id, fatal=False)
        info.update({
            '_type': 'url_transparent',
            'ie_key': OoyalaIE.ie_key(),
            'url': 'ooyala:%s' % ooyala_code,
            'display_id': display_id,
        })
        return info