Merge remote-tracking branch 'origin/master' into paj/sbs-news-without-id

2019-06-04 19:42:02 +10:00 · 2019-06-04 19:42:02 +10:00 · 7f98919051
commit 7f98919051
parent e355d11e13 c94c121a99
5 changed files with 166 additions and 138 deletions
--- a/youtube_dl/extractor/liveleak.py
+++ b/youtube_dl/extractor/liveleak.py
@ -82,6 +82,10 @@ class LiveLeakIE(InfoExtractor):
    }, {
        'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
        'only_matching': True,
    }, {
        # No original video
        'url': 'https://www.liveleak.com/view?t=C26ZZ_1558612804',
        'only_matching': True,
    }]
    @staticmethod
@ -134,11 +138,13 @@ class LiveLeakIE(InfoExtractor):
                orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url'])
                if a_format['url'] != orig_url:
                    format_id = a_format.get('format_id')
-                    formats.append({
+                    format_id = 'original' + ('-' + format_id if format_id else '')
-                        'format_id': 'original' + ('-' + format_id if format_id else ''),
+                    if self._is_valid_url(orig_url, video_id, format_id):
-                        'url': orig_url,
+                        formats.append({
-                        'preference': 1,
+                            'format_id': format_id,
-                    })
+                            'url': orig_url,
                            'preference': 1,
                        })
            self._sort_formats(formats)
            info_dict['formats'] = formats
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@ -16,6 +16,11 @@ from ..utils import (
 class ProSiebenSat1BaseIE(InfoExtractor):
    _GEO_COUNTRIES = ['DE']
    _ACCESS_ID = None
    _SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear'
    _V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get'
    def _extract_video_info(self, url, clip_id):
        client_location = url
@ -31,93 +36,128 @@ class ProSiebenSat1BaseIE(InfoExtractor):
        if video.get('is_protected') is True:
            raise ExtractorError('This video is DRM protected.', expected=True)
        duration = float_or_none(video.get('duration'))
        source_ids = [compat_str(source['id']) for source in video['sources']]
        client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
        sources = self._download_json(
            'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id,
            clip_id, 'Downloading sources JSON', query={
                'access_token': self._TOKEN,
                'client_id': client_id,
                'client_location': client_location,
                'client_name': self._CLIENT_NAME,
            })
        server_id = sources['server_id']
        def fix_bitrate(bitrate):
            bitrate = int_or_none(bitrate)
            if not bitrate:
                return None
            return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
        formats = []
-        for source_id in source_ids:
+        if self._ACCESS_ID:
-            client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
+            raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID
-            urls = self._download_json(
+            server_token = (self._download_json(
-                'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id,
+                self._V4_BASE_URL + 'protocols', clip_id,
-                clip_id, 'Downloading urls JSON', fatal=False, query={
+                'Downloading protocols JSON',
                headers=self.geo_verification_headers(), query={
                    'access_id': self._ACCESS_ID,
                    'client_token': sha1((raw_ct).encode()).hexdigest(),
                    'video_id': clip_id,
                }, fatal=False) or {}).get('server_token')
            if server_token:
                urls = (self._download_json(
                    self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={
                        'access_id': self._ACCESS_ID,
                        'client_token': sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(),
                        'protocols': self._SUPPORTED_PROTOCOLS,
                        'server_token': server_token,
                        'video_id': clip_id,
                    }, fatal=False) or {}).get('urls') or {}
                for protocol, variant in urls.items():
                    source_url = variant.get('clear', {}).get('url')
                    if not source_url:
                        continue
                    if protocol == 'dash':
                        formats.extend(self._extract_mpd_formats(
                            source_url, clip_id, mpd_id=protocol, fatal=False))
                    elif protocol == 'hls':
                        formats.extend(self._extract_m3u8_formats(
                            source_url, clip_id, 'mp4', 'm3u8_native',
                            m3u8_id=protocol, fatal=False))
                    else:
                        formats.append({
                            'url': source_url,
                            'format_id': protocol,
                        })
        if not formats:
            source_ids = [compat_str(source['id']) for source in video['sources']]
            client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
            sources = self._download_json(
                'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id,
                clip_id, 'Downloading sources JSON', query={
                    'access_token': self._TOKEN,
                    'client_id': client_id,
                    'client_location': client_location,
                    'client_name': self._CLIENT_NAME,
                    'server_id': server_id,
                    'source_ids': source_id,
                })
-            if not urls:
+            server_id = sources['server_id']
-                continue
+
-            if urls.get('status_code') != 0:
+            def fix_bitrate(bitrate):
-                raise ExtractorError('This video is unavailable', expected=True)
+                bitrate = int_or_none(bitrate)
-            urls_sources = urls['sources']
+                if not bitrate:
-            if isinstance(urls_sources, dict):
+                    return None
-                urls_sources = urls_sources.values()
+                return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
-            for source in urls_sources:
+
-                source_url = source.get('url')
+            for source_id in source_ids:
-                if not source_url:
+                client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
                urls = self._download_json(
                    'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id,
                    clip_id, 'Downloading urls JSON', fatal=False, query={
                        'access_token': self._TOKEN,
                        'client_id': client_id,
                        'client_location': client_location,
                        'client_name': self._CLIENT_NAME,
                        'server_id': server_id,
                        'source_ids': source_id,
                    })
                if not urls:
                    continue
-                protocol = source.get('protocol')
+                if urls.get('status_code') != 0:
-                mimetype = source.get('mimetype')
+                    raise ExtractorError('This video is unavailable', expected=True)
-                if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
+                urls_sources = urls['sources']
-                    formats.extend(self._extract_f4m_formats(
+                if isinstance(urls_sources, dict):
-                        source_url, clip_id, f4m_id='hds', fatal=False))
+                    urls_sources = urls_sources.values()
-                elif mimetype == 'application/x-mpegURL':
+                for source in urls_sources:
-                    formats.extend(self._extract_m3u8_formats(
+                    source_url = source.get('url')
-                        source_url, clip_id, 'mp4', 'm3u8_native',
+                    if not source_url:
-                        m3u8_id='hls', fatal=False))
+                        continue
-                elif mimetype == 'application/dash+xml':
+                    protocol = source.get('protocol')
-                    formats.extend(self._extract_mpd_formats(
+                    mimetype = source.get('mimetype')
-                        source_url, clip_id, mpd_id='dash', fatal=False))
+                    if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
-                else:
+                        formats.extend(self._extract_f4m_formats(
-                    tbr = fix_bitrate(source['bitrate'])
+                            source_url, clip_id, f4m_id='hds', fatal=False))
-                    if protocol in ('rtmp', 'rtmpe'):
+                    elif mimetype == 'application/x-mpegURL':
-                        mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
+                        formats.extend(self._extract_m3u8_formats(
-                        if not mobj:
+                            source_url, clip_id, 'mp4', 'm3u8_native',
-                            continue
+                            m3u8_id='hls', fatal=False))
-                        path = mobj.group('path')
+                    elif mimetype == 'application/dash+xml':
-                        mp4colon_index = path.rfind('mp4:')
+                        formats.extend(self._extract_mpd_formats(
-                        app = path[:mp4colon_index]
+                            source_url, clip_id, mpd_id='dash', fatal=False))
                        play_path = path[mp4colon_index:]
                        formats.append({
                            'url': '%s/%s' % (mobj.group('url'), app),
                            'app': app,
                            'play_path': play_path,
                            'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
                            'page_url': 'http://www.prosieben.de',
                            'tbr': tbr,
                            'ext': 'flv',
                            'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''),
                        })
                    else:
-                        formats.append({
+                        tbr = fix_bitrate(source['bitrate'])
-                            'url': source_url,
+                        if protocol in ('rtmp', 'rtmpe'):
-                            'tbr': tbr,
+                            mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
-                            'format_id': 'http%s' % ('-%d' % tbr if tbr else ''),
+                            if not mobj:
-                        })
+                                continue
                            path = mobj.group('path')
                            mp4colon_index = path.rfind('mp4:')
                            app = path[:mp4colon_index]
                            play_path = path[mp4colon_index:]
                            formats.append({
                                'url': '%s/%s' % (mobj.group('url'), app),
                                'app': app,
                                'play_path': play_path,
                                'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
                                'page_url': 'http://www.prosieben.de',
                                'tbr': tbr,
                                'ext': 'flv',
                                'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''),
                            })
                        else:
                            formats.append({
                                'url': source_url,
                                'tbr': tbr,
                                'format_id': 'http%s' % ('-%d' % tbr if tbr else ''),
                            })
        self._sort_formats(formats)
        return {
-            'duration': duration,
+            'duration': float_or_none(video.get('duration')),
            'formats': formats,
        }
@ -344,6 +384,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
    _TOKEN = 'prosieben'
    _SALT = '01!8d8F_)r9]4s[qeuXfP%'
    _CLIENT_NAME = 'kolibri-2.0.19-splec4'
    _ACCESS_ID = 'x_prosiebenmaxx-de'
    _ENCRYPTION_KEY = 'Eeyeey9oquahthainoofashoyoikosag'
    _IV = 'Aeluchoc6aevechuipiexeeboowedaok'
    _CLIPID_REGEXES = [
        r'"clip_id"\s*:\s+"(\d+)"',
        r'clipid: "(\d+)"',
--- a/youtube_dl/extractor/rtp.py
+++ b/youtube_dl/extractor/rtp.py
@ -1,9 +1,11 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
    js_to_json,
 )
 class RTPIE(InfoExtractor):
@ -18,10 +20,6 @@ class RTPIE(InfoExtractor):
            'description': 'As paixões musicais de António Cartaxo e António Macedo',
            'thumbnail': r're:^https?://.*\.jpg',
        },
        'params': {
            # rtmp download
            'skip_download': True,
        },
    }, {
        'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
        'only_matching': True,
@ -33,57 +31,36 @@ class RTPIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id)
        title = self._html_search_meta(
            'twitter:title', webpage, display_name='title', fatal=True)
        description = self._html_search_meta('description', webpage)
        thumbnail = self._og_search_thumbnail(webpage)
-        player_config = self._search_regex(
+        config = self._parse_json(self._search_regex(
-            r'(?s)RTPPLAY\.player\.newPlayer\(\s*(\{.*?\})\s*\)', webpage, 'player config')
+            r'(?s)RTPPlayer\(({.+?})\);', webpage,
-        config = self._parse_json(player_config, video_id)
+            'player config'), video_id, js_to_json)
-
+        file_url = config['file']
-        path, ext = config.get('file').rsplit('.', 1)
+        ext = determine_ext(file_url)
-        formats = [{
+        if ext == 'm3u8':
-            'format_id': 'rtmp',
+            file_key = config.get('fileKey')
-            'ext': ext,
+            formats = self._extract_m3u8_formats(
-            'vcodec': config.get('type') == 'audio' and 'none' or None,
+                file_url, video_id, 'mp4', 'm3u8_native',
-            'preference': -2,
+                m3u8_id='hls', fatal=file_key)
-            'url': 'rtmp://{streamer:s}/{application:s}'.format(**config),
+            if file_key:
-            'app': config.get('application'),
+                formats.append({
-            'play_path': '{ext:s}:{path:s}'.format(ext=ext, path=path),
+                    'url': 'https://cdn-ondemand.rtp.pt' + file_key,
-            'page_url': url,
+                    'preference': 1,
-            'rtmp_live': config.get('live', False),
+                })
-            'player_url': 'http://programas.rtp.pt/play/player.swf?v3',
+            self._sort_formats(formats)
-            'rtmp_real_time': True,
+        else:
-        }]
+            formats = [{
-
+                'url': file_url,
-        # Construct regular HTTP download URLs
+                'ext': ext,
-        replacements = {
+            }]
-            'audio': {
+        if config.get('mediaType') == 'audio':
-                'format_id': 'mp3',
+            for f in formats:
-                'pattern': r'^nas2\.share/wavrss/',
+                f['vcodec'] = 'none'
                'repl': 'http://rsspod.rtp.pt/podcasts/',
                'vcodec': 'none',
            },
            'video': {
                'format_id': 'mp4_h264',
                'pattern': r'^nas2\.share/h264/',
                'repl': 'http://rsspod.rtp.pt/videocasts/',
                'vcodec': 'h264',
            },
        }
        r = replacements[config['type']]
        if re.match(r['pattern'], config['file']) is not None:
            formats.append({
                'format_id': r['format_id'],
                'url': re.sub(r['pattern'], r['repl'], config['file']),
                'vcodec': r['vcodec'],
            })
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': title,
            'formats': formats,
-            'description': description,
+            'description': self._html_search_meta(['description', 'twitter:description'], webpage),
-            'thumbnail': thumbnail,
+            'thumbnail': config.get('poster') or self._og_search_thumbnail(webpage),
        }
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@ -21,7 +21,7 @@ from ..utils import (
 class VikiBaseIE(InfoExtractor):
    _VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
    _API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
-    _API_URL_TEMPLATE = 'http://api.viki.io%s&sig=%s'
+    _API_URL_TEMPLATE = 'https://api.viki.io%s&sig=%s'
    _APP = '100005a'
    _APP_VERSION = '2.2.5.1428709186'
@ -377,7 +377,7 @@ class VikiChannelIE(VikiBaseIE):
                for video in page['response']:
                    video_id = video['id']
                    entries.append(self.url_result(
-                        'http://www.viki.com/videos/%s' % video_id, 'Viki'))
+                        'https://www.viki.com/videos/%s' % video_id, 'Viki'))
                if not page['pagination']['next']:
                    break
--- a/youtube_dl/extractor/vrv.py
+++ b/youtube_dl/extractor/vrv.py
@ -130,7 +130,7 @@ class VRVIE(VRVBaseIE):
        self._TOKEN_SECRET = token_credentials['oauth_token_secret']
    def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
-        if not url or stream_format not in ('hls', 'dash'):
+        if not url or stream_format not in ('hls', 'dash', 'adaptive_hls'):
            return []
        stream_id_list = []
        if audio_lang:
@ -140,7 +140,7 @@ class VRVIE(VRVBaseIE):
        format_id = stream_format
        if stream_id_list:
            format_id += '-' + '-'.join(stream_id_list)
-        if stream_format == 'hls':
+        if 'hls' in stream_format:
            adaptive_formats = self._extract_m3u8_formats(
                url, video_id, 'mp4', m3u8_id=format_id,
                note='Downloading %s information' % format_id,