Merge pull request #55 from ytdl-org/master

[pull] master from ytdl-org:master
2019-08-01 22:19:02 +00:00 · 2019-08-01 22:19:02 +00:00 · f660790905
commit f660790905
parent 44ec33a626 33b529fabd
3 changed files with 108 additions and 37 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -366,3 +366,67 @@ duration = float_or_none(video.get('durationMs'), scale=1000)
 view_count = int_or_none(video.get('views'))
 ```
 ### Inline values
 Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult.
 #### Example
 Correct:
 ```python
 title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
 ```
 Incorrect:
 ```python
 TITLE_RE = r'<title>([^<]+)</title>'
 # ...some lines of code...
 title = self._html_search_regex(TITLE_RE, webpage, 'title')
 ```
 ### Collapse fallbacks
 Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of meta values.
 #### Example
 Good:
 ```python
 description = self._html_search_meta(
    ['og:description', 'description', 'twitter:description'],
    webpage, 'description', default=None)
 ```
 Unwieldy:
 ```python
 description = (
    self._og_search_description(webpage, default=None)
    or self._html_search_meta('description', webpage, default=None)
    or self._html_search_meta('twitter:description', webpage, default=None))
 ```
 ### Trailing parentheses
 Always move trailing parentheses after the last argument.
 #### Example
 Correct:
 ```python
    lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
    list)
 ```
 Incorrect:
 ```python
    lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
    list,
 )
 ```
--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@ -5,14 +5,8 @@ import re
 import string
 from .discoverygo import DiscoveryGoBaseIE
-from ..compat import (
+from ..compat import compat_urllib_parse_unquote
-    compat_str,
+from ..utils import ExtractorError
    compat_urllib_parse_unquote,
 )
 from ..utils import (
    ExtractorError,
    try_get,
 )
 from ..compat import compat_HTTPError
@ -40,15 +34,15 @@ class DiscoveryIE(DiscoveryGoBaseIE):
                    cookingchanneltv|
                    motortrend
                )
-        )\.com(?P<path>/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+))'''
+        )\.com/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+)'''
    _TESTS = [{
-        'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley',
+        'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry',
        'info_dict': {
-            'id': '5a2d9b4d6b66d17a5026e1fd',
+            'id': '5a2f35ce6b66d17a5026e29e',
            'ext': 'mp4',
-            'title': 'Dave Foley',
+            'title': 'Riding with Matthew Perry',
-            'description': 'md5:4b39bcafccf9167ca42810eb5f28b01f',
+            'description': 'md5:a34333153e79bc4526019a5129e7f878',
-            'duration': 608,
+            'duration': 84,
        },
        'params': {
            'skip_download': True,  # requires ffmpeg
@ -62,17 +56,10 @@ class DiscoveryIE(DiscoveryGoBaseIE):
    }]
    _GEO_COUNTRIES = ['US']
    _GEO_BYPASS = False
    _API_BASE_URL = 'https://api.discovery.com/v1/'
    def _real_extract(self, url):
-        site, path, display_id = re.match(self._VALID_URL, url).groups()
+        site, display_id = re.match(self._VALID_URL, url).groups()
        webpage = self._download_webpage(url, display_id)
        react_data = self._parse_json(self._search_regex(
            r'window\.__reactTransmitPacket\s*=\s*({.+?});',
            webpage, 'react data'), display_id)
        content_blocks = react_data['layout'][path]['contentBlocks']
        video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0]
        video_id = video['id']
        access_token = None
        cookies = self._get_cookies(url)
@ -82,27 +69,33 @@ class DiscoveryIE(DiscoveryGoBaseIE):
        if auth_storage_cookie and auth_storage_cookie.value:
            auth_storage = self._parse_json(compat_urllib_parse_unquote(
                compat_urllib_parse_unquote(auth_storage_cookie.value)),
-                video_id, fatal=False) or {}
+                display_id, fatal=False) or {}
            access_token = auth_storage.get('a') or auth_storage.get('access_token')
        if not access_token:
            access_token = self._download_json(
-                'https://%s.com/anonymous' % site, display_id, query={
+                'https://%s.com/anonymous' % site, display_id,
                'Downloading token JSON metadata', query={
                    'authRel': 'authorization',
-                    'client_id': try_get(
+                    'client_id': '3020a40c2356a645b4b4',
                        react_data, lambda x: x['application']['apiClientId'],
                        compat_str) or '3020a40c2356a645b4b4',
                    'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
                    'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
                })['access_token']
        try:
        headers = self.geo_verification_headers()
        headers['Authorization'] = 'Bearer ' + access_token
        try:
            video = self._download_json(
                self._API_BASE_URL + 'content/videos',
                display_id, 'Downloading content JSON metadata',
                headers=headers, query={
                    'slug': display_id,
                })[0]
            video_id = video['id']
            stream = self._download_json(
-                'https://api.discovery.com/v1/streaming/video/' + video_id,
+                self._API_BASE_URL + 'streaming/video/' + video_id,
-                display_id, headers=headers)
+                display_id, 'Downloading streaming JSON metadata', headers=headers)
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
                e_description = self._parse_json(
--- a/youtube_dl/extractor/yandexvideo.py
+++ b/youtube_dl/extractor/yandexvideo.py
@ -3,6 +3,7 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
    int_or_none,
    url_or_none,
 )
@ -47,6 +48,10 @@ class YandexVideoIE(InfoExtractor):
        # episode, sports
        'url': 'https://yandex.ru/?stream_channel=1538487871&stream_id=4132a07f71fb0396be93d74b3477131d',
        'only_matching': True,
    }, {
        # DASH with DRM
        'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8',
        'only_matching': True,
    }]
    def _real_extract(self, url):
@ -59,13 +64,22 @@ class YandexVideoIE(InfoExtractor):
                'disable_trackings': 1,
            })['content']
-        m3u8_url = url_or_none(content.get('content_url')) or url_or_none(
+        content_url = url_or_none(content.get('content_url')) or url_or_none(
            content['streams'][0]['url'])
        title = content.get('title') or content.get('computed_title')
        ext = determine_ext(content_url)
        if ext == 'm3u8':
            formats = self._extract_m3u8_formats(
-            m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                content_url, video_id, 'mp4', entry_protocol='m3u8_native',
                m3u8_id='hls')
        elif ext == 'mpd':
            formats = self._extract_mpd_formats(
                content_url, video_id, mpd_id='dash')
        else:
            formats = [{'url': content_url}]
        self._sort_formats(formats)
        description = content.get('description')