Merge branch 'master' into BlenderCloud-issue-13282

2017-06-30 19:28:10 -05:00 · 2017-06-30 19:28:10 -05:00 · 6a31a87f38
commit 6a31a87f38
parent 9cf9cb22bd 54faac2235
5 changed files with 86 additions and 22 deletions
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@ -36,7 +36,7 @@ class BBCCoUkIE(InfoExtractor):
                        (?:
                            programmes/(?!articles/)|
                            iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
-                            music/clips[/#]|
+                            music/(?:clips|audiovideo/popular)[/#]|
                            radio/player/
                        )
                        (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
@ -229,8 +229,10 @@ class BBCCoUkIE(InfoExtractor):
        }, {
            'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
            'only_matching': True,
-        }
-    ]
+        }, {
+            'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
+            'only_matching': True,
+        }]

    _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'

@ -523,6 +525,12 @@ class BBCCoUkIE(InfoExtractor):

        webpage = self._download_webpage(url, group_id, 'Downloading video page')

+        error = self._search_regex(
+            r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<',
+            webpage, 'error', default=None)
+        if error:
+            raise ExtractorError(error, expected=True)
+
        programme_id = None
        duration = None

--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -1002,17 +1002,17 @@ class InfoExtractor(object):
                item_type = e.get('@type')
                if expected_type is not None and expected_type != item_type:
                    return info
-                if item_type == 'TVEpisode':
+                if item_type in ('TVEpisode', 'Episode'):
                    info.update({
                        'episode': unescapeHTML(e.get('name')),
                        'episode_number': int_or_none(e.get('episodeNumber')),
                        'description': unescapeHTML(e.get('description')),
                    })
                    part_of_season = e.get('partOfSeason')
-                    if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason':
+                    if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
                        info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
                    part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
-                    if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries':
+                    if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
                        info['series'] = unescapeHTML(part_of_series.get('name'))
                elif item_type == 'Article':
                    info.update({
@ -1022,10 +1022,10 @@ class InfoExtractor(object):
                    })
                elif item_type == 'VideoObject':
                    extract_video_object(e)
-                elif item_type == 'WebPage':
-                    video = e.get('video')
-                    if isinstance(video, dict) and video.get('@type') == 'VideoObject':
-                        extract_video_object(video)
+                    continue
+                video = e.get('video')
+                if isinstance(video, dict) and video.get('@type') == 'VideoObject':
+                    extract_video_object(video)
                break
        return dict((k, v) for k, v in info.items() if v is not None)

--- a/youtube_dl/extractor/dplay.py
+++ b/youtube_dl/extractor/dplay.py
@ -184,7 +184,7 @@ class DPlayItIE(InfoExtractor):
        webpage = self._download_webpage(url, display_id)

        info_url = self._search_regex(
-            r'url\s*:\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
+            r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
            webpage, 'video id')

        title = remove_end(self._og_search_title(webpage), ' | Dplay')
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -2048,6 +2048,13 @@ class GenericIE(InfoExtractor):
        video_description = self._og_search_description(webpage, default=None)
        video_thumbnail = self._og_search_thumbnail(webpage, default=None)

+        info_dict.update({
+            'title': video_title,
+            'description': video_description,
+            'thumbnail': video_thumbnail,
+            'age_limit': age_limit,
+        })
+
        # Look for Brightcove Legacy Studio embeds
        bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
        if bc_urls:
@ -2684,18 +2691,26 @@ class GenericIE(InfoExtractor):
            return self.playlist_from_matches(
                mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())

+        def merge_dicts(dict1, dict2):
+            merged = {}
+            for k, v in dict1.items():
+                if v is not None:
+                    merged[k] = v
+            for k, v in dict2.items():
+                if v is None:
+                    continue
+                if (k not in merged or
+                        (isinstance(v, compat_str) and v and
+                            isinstance(merged[k], compat_str) and
+                            not merged[k])):
+                    merged[k] = v
+            return merged
+
        # Looking for http://schema.org/VideoObject
        json_ld = self._search_json_ld(
            webpage, video_id, default={}, expected_type='VideoObject')
        if json_ld.get('url'):
-            info_dict.update({
-                'title': video_title or info_dict['title'],
-                'description': video_description,
-                'thumbnail': video_thumbnail,
-                'age_limit': age_limit
-            })
-            info_dict.update(json_ld)
-            return info_dict
+            return merge_dicts(json_ld, info_dict)

        # Look for HTML5 media
        entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
@ -2713,9 +2728,7 @@ class GenericIE(InfoExtractor):
        if jwplayer_data:
            info = self._parse_jwplayer_data(
                jwplayer_data, video_id, require_title=False, base_url=url)
-            if not info.get('title'):
-                info['title'] = video_title
-            return info
+            return merge_dicts(info, info_dict)

        def check_video(vurl):
            if YoutubeIE.suitable(vurl):
--- a/youtube_dl/extractor/tastytrade.py
+++ b/youtube_dl/extractor/tastytrade.py
@ -0,0 +1,43 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .ooyala import OoyalaIE
+
+
+class TastyTradeIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?tastytrade\.com/tt/shows/[^/]+/episodes/(?P<id>[^/?#&]+)'
+
+    _TESTS = [{
+        'url': 'https://www.tastytrade.com/tt/shows/market-measures/episodes/correlation-in-short-volatility-06-28-2017',
+        'info_dict': {
+            'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
+            'ext': 'mp4',
+            'title': 'A History of Teaming',
+            'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
+            'duration': 422.255,
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'add_ie': ['Ooyala'],
+    }, {
+        'url': 'https://www.tastytrade.com/tt/shows/daily-dose/episodes/daily-dose-06-30-2017',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        ooyala_code = self._search_regex(
+            r'data-media-id=(["\'])(?P<code>(?:(?!\1).)+)\1',
+            webpage, 'ooyala code', group='code')
+
+        info = self._search_json_ld(webpage, display_id, fatal=False)
+        info.update({
+            '_type': 'url_transparent',
+            'ie_key': OoyalaIE.ie_key(),
+            'url': 'ooyala:%s' % ooyala_code,
+            'display_id': display_id,
+        })
+        return info