Merge remote-tracking branch 'upstream/master'

# Conflicts: # youtube_dl/extractor/facebook.py
2019-07-13 22:55:40 +03:00 · 2019-07-13 22:55:40 +03:00 · d08fed0d41
commit d08fed0d41
parent 25df555679 d89a0a8026
14 changed files with 288 additions and 160 deletions
--- a/youtube_dl/extractor/biobiochiletv.py
+++ b/youtube_dl/extractor/biobiochiletv.py
@ -6,7 +6,6 @@ from ..utils import (
    ExtractorError,
    remove_end,
 )
 from .rudo import RudoIE
 class BioBioChileTVIE(InfoExtractor):
@ -41,11 +40,15 @@ class BioBioChileTVIE(InfoExtractor):
    }, {
        'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml',
        'info_dict': {
-            'id': 'edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos',
+            'id': 'b4xd0LK3SK',
            'ext': 'mp4',
-            'uploader': '(none)',
+            # TODO: fix url_transparent information overriding
-            'upload_date': '20160708',
+            # 'uploader': 'Juan Pablo Echenique',
-            'title': 'Edecanes del Congreso: Figuras decorativas que le cuestan muy caro a los chilenos',
+            'title': 'Comentario Oscar Cáceres',
        },
        'params': {
            # empty m3u8 manifest
            'skip_download': True,
        },
    }, {
        'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
@ -60,7 +63,9 @@ class BioBioChileTVIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id)
-        rudo_url = RudoIE._extract_url(webpage)
+        rudo_url = self._search_regex(
            r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
            webpage, 'embed URL', None, group='url')
        if not rudo_url:
            raise ExtractorError('No videos found')
@ -68,7 +73,7 @@ class BioBioChileTVIE(InfoExtractor):
        thumbnail = self._og_search_thumbnail(webpage)
        uploader = self._html_search_regex(
-            r'<a[^>]+href=["\']https?://(?:busca|www)\.biobiochile\.cl/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
+            r'<a[^>]+href=["\'](?:https?://(?:busca|www)\.biobiochile\.cl)?/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
            webpage, 'uploader', fatal=False)
        return {
--- a/youtube_dl/extractor/bleacherreport.py
+++ b/youtube_dl/extractor/bleacherreport.py
@ -71,7 +71,7 @@ class BleacherReportIE(InfoExtractor):
        video = article_data.get('video')
        if video:
            video_type = video['type']
-            if video_type == 'cms.bleacherreport.com':
+            if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'):
                info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
            elif video_type == 'ooyala.com':
                info['url'] = 'ooyala:%s' % video['id']
@ -87,9 +87,9 @@ class BleacherReportIE(InfoExtractor):
 class BleacherReportCMSIE(AMPIE):
-    _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
+    _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
    _TESTS = [{
-        'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
+        'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
        'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',
        'info_dict': {
            'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
@ -101,6 +101,6 @@ class BleacherReportCMSIE(AMPIE):
    def _real_extract(self, url):
        video_id = self._match_id(url)
-        info = self._extract_feed_info('http://cms.bleacherreport.com/media/items/%s/akamai.json' % video_id)
+        info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id)
        info['id'] = video_id
        return info
--- a/youtube_dl/extractor/dbtv.py
+++ b/youtube_dl/extractor/dbtv.py
@ -7,50 +7,51 @@ from .common import InfoExtractor
 class DBTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:[^/]+/)?(?P<id>[0-9]+)(?:#(?P<display_id>.+))?'
+    _VALID_URL = r'https?://(?:www\.)?dagbladet\.no/video/(?:(?:embed|(?P<display_id>[^/]+))/)?(?P<id>[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8})'
    _TESTS = [{
-        'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
+        'url': 'https://www.dagbladet.no/video/PynxJnNWChE/',
-        'md5': '2e24f67936517b143a234b4cadf792ec',
+        'md5': 'b8f850ba1860adbda668d367f9b77699',
        'info_dict': {
-            'id': '3649835190001',
+            'id': 'PynxJnNWChE',
            'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
            'ext': 'mp4',
            'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
-            'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0',
+            'description': 'md5:49cc8370e7d66e8a2ef15c3b4631fd3f',
            'thumbnail': r're:https?://.*\.jpg',
-            'timestamp': 1404039863,
+            'upload_date': '20160916',
-            'upload_date': '20140629',
+            'duration': 69,
-            'duration': 69.544,
+            'uploader_id': 'UCk5pvsyZJoYJBd7_oFPTlRQ',
-            'uploader_id': '1027729757001',
+            'uploader': 'Dagbladet',
        },
-        'add_ie': ['BrightcoveNew']
+        'add_ie': ['Youtube']
    }, {
-        'url': 'http://dbtv.no/3649835190001',
+        'url': 'https://www.dagbladet.no/video/embed/xlGmyIeN9Jo/?autoplay=false',
        'only_matching': True,
    }, {
-        'url': 'http://www.dbtv.no/lazyplayer/4631135248001',
+        'url': 'https://www.dagbladet.no/video/truer-iran-bor-passe-dere/PalfB2Cw',
        'only_matching': True,
    }, {
        'url': 'http://dbtv.no/vice/5000634109001',
        'only_matching': True,
    }, {
        'url': 'http://dbtv.no/filmtrailer/3359293614001',
        'only_matching': True,
    }]
    @staticmethod
    def _extract_urls(webpage):
        return [url for _, url in re.findall(
-            r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dbtv\.no/(?:lazy)?player/\d+.*?)\1',
+            r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dagbladet\.no/video/embed/(?:[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8}).*?)\1',
            webpage)]
    def _real_extract(self, url):
-        video_id, display_id = re.match(self._VALID_URL, url).groups()
+        display_id, video_id = re.match(self._VALID_URL, url).groups()
-
+        info = {
        return {
            '_type': 'url_transparent',
            'url': 'http://players.brightcove.net/1027729757001/default_default/index.html?videoId=%s' % video_id,
            'id': video_id,
            'display_id': display_id,
            'ie_key': 'BrightcoveNew',
        }
        if len(video_id) == 11:
            info.update({
                'url': video_id,
                'ie_key': 'Youtube',
            })
        else:
            info.update({
                'url': 'jwplatform:' + video_id,
                'ie_key': 'JWPlatform',
            })
        return info
--- a/youtube_dl/extractor/dlive.py
+++ b/youtube_dl/extractor/dlive.py
@ -0,0 +1,94 @@
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
 from ..utils import int_or_none
 class DLiveVODIE(InfoExtractor):
    IE_NAME = 'dlive:vod'
    _VALID_URL = r'https?://(?:www\.)?dlive\.tv/p/(?P<uploader_id>.+?)\+(?P<id>[a-zA-Z0-9]+)'
    _TEST = {
        'url': 'https://dlive.tv/p/pdp+3mTzOl4WR',
        'info_dict': {
            'id': '3mTzOl4WR',
            'ext': 'mp4',
            'title': 'Minecraft with james charles epic',
            'upload_date': '20190701',
            'timestamp': 1562011015,
            'uploader_id': 'pdp',
        }
    }
    def _real_extract(self, url):
        uploader_id, vod_id = re.match(self._VALID_URL, url).groups()
        broadcast = self._download_json(
            'https://graphigo.prd.dlive.tv/', vod_id,
            data=json.dumps({'query': '''query {
  pastBroadcast(permlink:"%s+%s") {
    content
    createdAt
    length
    playbackUrl
    title
    thumbnailUrl
    viewCount
  }
 }''' % (uploader_id, vod_id)}).encode())['data']['pastBroadcast']
        title = broadcast['title']
        formats = self._extract_m3u8_formats(
            broadcast['playbackUrl'], vod_id, 'mp4', 'm3u8_native')
        self._sort_formats(formats)
        return {
            'id': vod_id,
            'title': title,
            'uploader_id': uploader_id,
            'formats': formats,
            'description': broadcast.get('content'),
            'thumbnail': broadcast.get('thumbnailUrl'),
            'timestamp': int_or_none(broadcast.get('createdAt'), 1000),
            'view_count': int_or_none(broadcast.get('viewCount')),
        }
 class DLiveStreamIE(InfoExtractor):
    IE_NAME = 'dlive:stream'
    _VALID_URL = r'https?://(?:www\.)?dlive\.tv/(?!p/)(?P<id>[\w.-]+)'
    def _real_extract(self, url):
        display_name = self._match_id(url)
        user = self._download_json(
            'https://graphigo.prd.dlive.tv/', display_name,
            data=json.dumps({'query': '''query {
  userByDisplayName(displayname:"%s") {
    livestream {
      content
      createdAt
      title
      thumbnailUrl
      watchingCount
    }
    username
  }
 }''' % display_name}).encode())['data']['userByDisplayName']
        livestream = user['livestream']
        title = livestream['title']
        username = user['username']
        formats = self._extract_m3u8_formats(
            'https://live.prd.dlive.tv/hls/live/%s.m3u8' % username,
            display_name, 'mp4')
        self._sort_formats(formats)
        return {
            'id': display_name,
            'title': self._live_title(title),
            'uploader': display_name,
            'uploader_id': username,
            'formats': formats,
            'description': livestream.get('content'),
            'thumbnail': livestream.get('thumbnailUrl'),
            'is_live': True,
            'timestamp': int_or_none(livestream.get('createdAt'), 1000),
            'view_count': int_or_none(livestream.get('watchingCount')),
        }
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -579,6 +579,7 @@ from .linkedin import (
 )
 from .linuxacademy import LinuxAcademyIE
 from .litv import LiTVIE
 from .livejournal import LiveJournalIE
 from .liveleak import (
    LiveLeakIE,
    LiveLeakEmbedIE,
@ -967,7 +968,6 @@ from .rts import RTSIE
 from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
 from .rtvnh import RTVNHIE
 from .rtvs import RTVSIE
 from .rudo import RudoIE
 from .ruhd import RUHDIE
 from .rutube import (
    RutubeIE,
@ -1255,6 +1255,10 @@ from .udn import UDNEmbedIE
 from .ufctv import UFCTVIE
 from .uktvplay import UKTVPlayIE
 from .digiteka import DigitekaIE
 from .dlive import (
    DLiveVODIE,
    DLiveStreamIE,
 )
 from .umg import UMGDeIE
 from .unistra import UnistraIE
 from .unity import UnityIE
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@ -462,8 +462,8 @@ class FacebookIE(InfoExtractor):
            r'[\'\"]ownerid[\'\"]\s*:\s*[\'\"](\d+)[\'\"]', tahoe_data.secondary,
            'uploader_id', fatal=False)
        thumbnail = self._og_search_thumbnail(webpage)
        thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
        if is_live:
            view_count = parse_count(
                self._search_regex(r'viewerCount:([\d]+)', webpage, 'views', fatal=False) or \
--- a/youtube_dl/extractor/livejournal.py
+++ b/youtube_dl/extractor/livejournal.py
@ -0,0 +1,42 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import int_or_none
 class LiveJournalIE(InfoExtractor):
    _VALID_URL = r'https?://(?:[^.]+\.)?livejournal\.com/video/album/\d+.+?\bid=(?P<id>\d+)'
    _TEST = {
        'url': 'https://andrei-bt.livejournal.com/video/album/407/?mode=view&id=51272',
        'md5': 'adaf018388572ced8a6f301ace49d4b2',
        'info_dict': {
            'id': '1263729',
            'ext': 'mp4',
            'title': 'Истребители против БПЛА',
            'upload_date': '20190624',
            'timestamp': 1561406715,
        }
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        record = self._parse_json(self._search_regex(
            r'Site\.page\s*=\s*({.+?});', webpage,
            'page data'), video_id)['video']['record']
        storage_id = compat_str(record['storageid'])
        title = record.get('name')
        if title:
            # remove filename extension(.mp4, .mov, etc...)
            title = title.rsplit('.', 1)[0]
        return {
            '_type': 'url_transparent',
            'id': video_id,
            'title': title,
            'thumbnail': record.get('thumbnail'),
            'timestamp': int_or_none(record.get('timecreate')),
            'url': 'eagleplatform:vc.videos.livejournal.com:' + storage_id,
            'ie_key': 'EaglePlatform',
        }
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dl/extractor/lynda.py
@ -117,6 +117,10 @@ class LyndaIE(LyndaBaseIE):
    }, {
        'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Willkommen-Grundlagen-guten-Gestaltung/393570/393572-4.html',
        'only_matching': True,
    }, {
        # Status="NotFound", Message="Transcript not found"
        'url': 'https://www.lynda.com/ASP-NET-tutorials/What-you-should-know/5034180/2811512-4.html',
        'only_matching': True,
    }]
    def _raise_unavailable(self, video_id):
@ -247,11 +251,16 @@ class LyndaIE(LyndaBaseIE):
    def _get_subtitles(self, video_id):
        url = 'https://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
-        subs = self._download_json(url, None, False)
+        subs = self._download_webpage(
            url, video_id, 'Downloading subtitles JSON', fatal=False)
        if not subs or 'Status="NotFound"' in subs:
            return {}
        subs = self._parse_json(subs, video_id, fatal=False)
        if not subs:
            return {}
        fixed_subs = self._fix_subtitles(subs)
        if fixed_subs:
            return {'en': [{'ext': 'srt', 'data': fixed_subs}]}
        else:
        return {}
--- a/youtube_dl/extractor/roosterteeth.py
+++ b/youtube_dl/extractor/roosterteeth.py
@ -4,11 +4,14 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..compat import (
    compat_HTTPError,
    compat_str,
 )
 from ..utils import (
    ExtractorError,
    int_or_none,
-    strip_or_none,
+    str_or_none,
    unescapeHTML,
    urlencode_postdata,
 )
@ -21,15 +24,14 @@ class RoosterTeethIE(InfoExtractor):
        'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
        'md5': 'e2bd7764732d785ef797700a2489f212',
        'info_dict': {
-            'id': '26576',
+            'id': '9156',
            'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement',
            'ext': 'mp4',
-            'title': 'Million Dollars, But...: Million Dollars, But... The Game Announcement',
+            'title': 'Million Dollars, But... The Game Announcement',
-            'description': 'md5:0cc3b21986d54ed815f5faeccd9a9ca5',
+            'description': 'md5:168a54b40e228e79f4ddb141e89fe4f5',
            'thumbnail': r're:^https?://.*\.png$',
            'series': 'Million Dollars, But...',
            'episode': 'Million Dollars, But... The Game Announcement',
            'comment_count': int,
        },
    }, {
        'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31',
@ -89,60 +91,55 @@ class RoosterTeethIE(InfoExtractor):
    def _real_extract(self, url):
        display_id = self._match_id(url)
        api_episode_url = 'https://svod-be.roosterteeth.com/api/v1/episodes/%s' % display_id
-        webpage = self._download_webpage(url, display_id)
+        try:
-
+            m3u8_url = self._download_json(
-        episode = strip_or_none(unescapeHTML(self._search_regex(
+                api_episode_url + '/videos', display_id,
-            (r'videoTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
+                'Downloading video JSON metadata')['data'][0]['attributes']['url']
-             r'<title>(?P<title>[^<]+)</title>'), webpage, 'title',
+        except ExtractorError as e:
-            default=None, group='title')))
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
-
+                if self._parse_json(e.cause.read().decode(), display_id).get('access') is False:
        title = strip_or_none(self._og_search_title(
            webpage, default=None)) or episode
        m3u8_url = self._search_regex(
            r'file\s*:\s*(["\'])(?P<url>http.+?\.m3u8.*?)\1',
            webpage, 'm3u8 url', default=None, group='url')
        if not m3u8_url:
            if re.search(r'<div[^>]+class=["\']non-sponsor', webpage):
                    self.raise_login_required(
                        '%s is only available for FIRST members' % display_id)
-
+            raise
            if re.search(r'<div[^>]+class=["\']golive-gate', webpage):
                self.raise_login_required('%s is not available yet' % display_id)
            raise ExtractorError('Unable to extract m3u8 URL')
        formats = self._extract_m3u8_formats(
-            m3u8_url, display_id, ext='mp4',
+            m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls')
            entry_protocol='m3u8_native', m3u8_id='hls')
        self._sort_formats(formats)
-        description = strip_or_none(self._og_search_description(webpage))
+        episode = self._download_json(
-        thumbnail = self._proto_relative_url(self._og_search_thumbnail(webpage))
+            api_episode_url, display_id,
            'Downloading episode JSON metadata')['data'][0]
        attributes = episode['attributes']
        title = attributes.get('title') or attributes['display_title']
        video_id = compat_str(episode['id'])
-        series = self._search_regex(
+        thumbnails = []
-            (r'<h2>More ([^<]+)</h2>', r'<a[^>]+>See All ([^<]+) Videos<'),
+        for image in episode.get('included', {}).get('images', []):
-            webpage, 'series', fatal=False)
+            if image.get('type') == 'episode_image':
-
+                img_attributes = image.get('attributes') or {}
-        comment_count = int_or_none(self._search_regex(
+                for k in ('thumb', 'small', 'medium', 'large'):
-            r'>Comments \((\d+)\)<', webpage,
+                    img_url = img_attributes.get(k)
-            'comment count', fatal=False))
+                    if img_url:
-
+                        thumbnails.append({
-        video_id = self._search_regex(
+                            'id': k,
-            (r'containerId\s*=\s*["\']episode-(\d+)\1',
+                            'url': img_url,
-             r'<div[^<]+id=["\']episode-(\d+)'), webpage,
+                        })
            'video id', default=display_id)
        return {
            'id': video_id,
            'display_id': display_id,
            'title': title,
-            'description': description,
+            'description': attributes.get('description') or attributes.get('caption'),
-            'thumbnail': thumbnail,
+            'thumbnails': thumbnails,
-            'series': series,
+            'series': attributes.get('show_title'),
-            'episode': episode,
+            'season_number': int_or_none(attributes.get('season_number')),
-            'comment_count': comment_count,
+            'season_id': attributes.get('season_id'),
            'episode': title,
            'episode_number': int_or_none(attributes.get('number')),
            'episode_id': str_or_none(episode.get('uuid')),
            'formats': formats,
            'channel_id': attributes.get('channel_id'),
            'duration': int_or_none(attributes.get('length')),
        }
--- a/youtube_dl/extractor/rudo.py
+++ b/youtube_dl/extractor/rudo.py
@ -1,53 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    js_to_json,
    get_element_by_class,
    unified_strdate,
 )
 class RudoIE(InfoExtractor):
    _VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)'
    _TEST = {
        'url': 'http://rudo.video/vod/oTzw0MGnyG',
        'md5': '2a03a5b32dd90a04c83b6d391cf7b415',
        'info_dict': {
            'id': 'oTzw0MGnyG',
            'ext': 'mp4',
            'title': 'Comentario Tomás Mosciatti',
            'upload_date': '20160617',
        },
    }
    @classmethod
    def _extract_url(cls, webpage):
        mobj = re.search(
            r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
            webpage)
        if mobj:
            return mobj.group('url')
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id, encoding='iso-8859-1')
        jwplayer_data = self._parse_json(self._search_regex(
            r'(?s)playerInstance\.setup\(({.+?})\)', webpage, 'jwplayer data'), video_id,
            transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s)))
        info_dict = self._parse_jwplayer_data(
            jwplayer_data, video_id, require_title=False, m3u8_id='hls', mpd_id='dash')
        info_dict.update({
            'title': self._og_search_title(webpage),
            'upload_date': unified_strdate(get_element_by_class('date', webpage)),
        })
        return info_dict
--- a/youtube_dl/extractor/spankbang.py
+++ b/youtube_dl/extractor/spankbang.py
@ -5,6 +5,7 @@ import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    merge_dicts,
    orderedSet,
    parse_duration,
    parse_resolution,
@ -26,6 +27,8 @@ class SpankBangIE(InfoExtractor):
            'description': 'dillion harper masturbates on a bed',
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'silly2587',
            'timestamp': 1422571989,
            'upload_date': '20150129',
            'age_limit': 18,
        }
    }, {
@ -106,31 +109,36 @@ class SpankBangIE(InfoExtractor):
            for format_id, format_url in stream.items():
                if format_id.startswith(STREAM_URL_PREFIX):
                    if format_url and isinstance(format_url, list):
                        format_url = format_url[0]
                    extract_format(
                        format_id[len(STREAM_URL_PREFIX):], format_url)
        self._sort_formats(formats)
        info = self._search_json_ld(webpage, video_id, default={})
        title = self._html_search_regex(
-            r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title')
+            r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title', default=None)
        description = self._search_regex(
            r'<div[^>]+\bclass=["\']bottom[^>]+>\s*<p>[^<]*</p>\s*<p>([^<]+)',
-            webpage, 'description', fatal=False)
+            webpage, 'description', default=None)
-        thumbnail = self._og_search_thumbnail(webpage)
+        thumbnail = self._og_search_thumbnail(webpage, default=None)
-        uploader = self._search_regex(
+        uploader = self._html_search_regex(
-            r'class="user"[^>]*><img[^>]+>([^<]+)',
+            (r'(?s)<li[^>]+class=["\']profile[^>]+>(.+?)</a>',
             r'class="user"[^>]*><img[^>]+>([^<]+)'),
            webpage, 'uploader', default=None)
        duration = parse_duration(self._search_regex(
            r'<div[^>]+\bclass=["\']right_side[^>]+>\s*<span>([^<]+)',
-            webpage, 'duration', fatal=False))
+            webpage, 'duration', default=None))
        view_count = str_to_int(self._search_regex(
-            r'([\d,.]+)\s+plays', webpage, 'view count', fatal=False))
+            r'([\d,.]+)\s+plays', webpage, 'view count', default=None))
        age_limit = self._rta_search(webpage)
-        return {
+        return merge_dicts({
            'id': video_id,
-            'title': title,
+            'title': title or video_id,
            'description': description,
            'thumbnail': thumbnail,
            'uploader': uploader,
@ -138,7 +146,8 @@ class SpankBangIE(InfoExtractor):
            'view_count': view_count,
            'formats': formats,
            'age_limit': age_limit,
-        }
+        }, info
        )
 class SpankBangPlaylistIE(InfoExtractor):
--- a/youtube_dl/extractor/spike.py
+++ b/youtube_dl/extractor/spike.py
@ -22,7 +22,7 @@ class BellatorIE(MTVServicesInfoExtractor):
        'only_matching': True,
    }]
-    _FEED_URL = 'http://www.spike.com/feeds/mrss/'
+    _FEED_URL = 'http://www.bellator.com/feeds/mrss/'
    _GEO_COUNTRIES = ['US']
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@ -438,11 +438,22 @@ class TwitterIE(InfoExtractor):
        'params': {
            'skip_download': True,  # requires ffmpeg
        },
    }, {
        'url': 'https://twitter.com/foobar/status/1087791357756956680',
        'info_dict': {
            'id': '1087791357756956680',
            'ext': 'mp4',
            'title': 'Twitter - A new is coming.  Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
            'thumbnail': r're:^https?://.*\.jpg',
            'description': 'md5:66d493500c013e3e2d434195746a7f78',
            'uploader': 'Twitter',
            'uploader_id': 'Twitter',
            'duration': 61.567,
        },
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        user_id = mobj.group('user_id')
        twid = mobj.group('id')
        webpage, urlh = self._download_webpage_handle(
@ -451,8 +462,13 @@ class TwitterIE(InfoExtractor):
        if 'twitter.com/account/suspended' in urlh.geturl():
            raise ExtractorError('Account suspended by Twitter.', expected=True)
-        if user_id is None:
+        user_id = None
-            mobj = re.match(self._VALID_URL, urlh.geturl())
+
        redirect_mobj = re.match(self._VALID_URL, urlh.geturl())
        if redirect_mobj:
            user_id = redirect_mobj.group('user_id')
        if not user_id:
            user_id = mobj.group('user_id')
        username = remove_end(self._og_search_title(webpage), ' on Twitter')
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -371,10 +371,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                            (?:www\.)?hooktube\.com/|
                            (?:www\.)?yourepeat\.com/|
                            tube\.majestyc\.net/|
                            # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
                            (?:(?:www|dev)\.)?invidio\.us/|
-                            (?:www\.)?invidiou\.sh/|
+                            (?:(?:www|no)\.)?invidiou\.sh/|
-                            (?:www\.)?invidious\.snopyta\.org/|
+                            (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
                            (?:www\.)?invidious\.kabi\.tk/|
                            (?:www\.)?invidious\.enkirton\.net/|
                            (?:www\.)?invidious\.13ad\.de/|
                            (?:www\.)?tube\.poal\.co/|
                            (?:www\.)?vid\.wxzm\.sx/|
                            youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
                         (?:.*?\#/)?                                          # handle anchor (#/) redirect urls