Merge pull request #55 from ytdl-org/master

[pull] master from ytdl-org:master
2019-08-01 22:19:02 +00:00 · 2019-08-01 22:19:02 +00:00 · f660790905
commit f660790905
parent 44ec33a626 33b529fabd
3 changed files with 108 additions and 37 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -366,3 +366,67 @@ duration = float_or_none(video.get('durationMs'), scale=1000)
 view_count = int_or_none(video.get('views'))
 ```

+### Inline values
+
+Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult.
+
+#### Example
+
+Correct:
+
+```python
+title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
+```
+
+Incorrect:
+
+```python
+TITLE_RE = r'<title>([^<]+)</title>'
+# ...some lines of code...
+title = self._html_search_regex(TITLE_RE, webpage, 'title')
+```
+
+### Collapse fallbacks
+
+Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of meta values.
+
+#### Example
+
+Good:
+
+```python
+description = self._html_search_meta(
+    ['og:description', 'description', 'twitter:description'],
+    webpage, 'description', default=None)
+```
+
+Unwieldy:
+
+```python
+description = (
+    self._og_search_description(webpage, default=None)
+    or self._html_search_meta('description', webpage, default=None)
+    or self._html_search_meta('twitter:description', webpage, default=None))
+```
+
+### Trailing parentheses
+
+Always move trailing parentheses after the last argument.
+
+#### Example
+
+Correct:
+
+```python
+    lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
+    list)
+```
+
+Incorrect:
+
+```python
+    lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
+    list,
+)
+```
+
--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@ -5,14 +5,8 @@ import re
 import string

 from .discoverygo import DiscoveryGoBaseIE
-from ..compat import (
-    compat_str,
-    compat_urllib_parse_unquote,
-)
-from ..utils import (
-    ExtractorError,
-    try_get,
-)
+from ..compat import compat_urllib_parse_unquote
+from ..utils import ExtractorError
 from ..compat import compat_HTTPError


@ -40,15 +34,15 @@ class DiscoveryIE(DiscoveryGoBaseIE):
                    cookingchanneltv|
                    motortrend
                )
-        )\.com(?P<path>/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+))'''
+        )\.com/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+)'''
    _TESTS = [{
-        'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley',
+        'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry',
        'info_dict': {
-            'id': '5a2d9b4d6b66d17a5026e1fd',
+            'id': '5a2f35ce6b66d17a5026e29e',
            'ext': 'mp4',
-            'title': 'Dave Foley',
-            'description': 'md5:4b39bcafccf9167ca42810eb5f28b01f',
-            'duration': 608,
+            'title': 'Riding with Matthew Perry',
+            'description': 'md5:a34333153e79bc4526019a5129e7f878',
+            'duration': 84,
        },
        'params': {
            'skip_download': True,  # requires ffmpeg
@ -62,17 +56,10 @@ class DiscoveryIE(DiscoveryGoBaseIE):
    }]
    _GEO_COUNTRIES = ['US']
    _GEO_BYPASS = False
+    _API_BASE_URL = 'https://api.discovery.com/v1/'

    def _real_extract(self, url):
-        site, path, display_id = re.match(self._VALID_URL, url).groups()
-        webpage = self._download_webpage(url, display_id)
-
-        react_data = self._parse_json(self._search_regex(
-            r'window\.__reactTransmitPacket\s*=\s*({.+?});',
-            webpage, 'react data'), display_id)
-        content_blocks = react_data['layout'][path]['contentBlocks']
-        video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0]
-        video_id = video['id']
+        site, display_id = re.match(self._VALID_URL, url).groups()

        access_token = None
        cookies = self._get_cookies(url)
@ -82,27 +69,33 @@ class DiscoveryIE(DiscoveryGoBaseIE):
        if auth_storage_cookie and auth_storage_cookie.value:
            auth_storage = self._parse_json(compat_urllib_parse_unquote(
                compat_urllib_parse_unquote(auth_storage_cookie.value)),
-                video_id, fatal=False) or {}
+                display_id, fatal=False) or {}
            access_token = auth_storage.get('a') or auth_storage.get('access_token')

        if not access_token:
            access_token = self._download_json(
-                'https://%s.com/anonymous' % site, display_id, query={
+                'https://%s.com/anonymous' % site, display_id,
+                'Downloading token JSON metadata', query={
                    'authRel': 'authorization',
-                    'client_id': try_get(
-                        react_data, lambda x: x['application']['apiClientId'],
-                        compat_str) or '3020a40c2356a645b4b4',
+                    'client_id': '3020a40c2356a645b4b4',
                    'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
                    'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
                })['access_token']

-        try:
-            headers = self.geo_verification_headers()
-            headers['Authorization'] = 'Bearer ' + access_token
+        headers = self.geo_verification_headers()
+        headers['Authorization'] = 'Bearer ' + access_token

+        try:
+            video = self._download_json(
+                self._API_BASE_URL + 'content/videos',
+                display_id, 'Downloading content JSON metadata',
+                headers=headers, query={
+                    'slug': display_id,
+                })[0]
+            video_id = video['id']
            stream = self._download_json(
-                'https://api.discovery.com/v1/streaming/video/' + video_id,
-                display_id, headers=headers)
+                self._API_BASE_URL + 'streaming/video/' + video_id,
+                display_id, 'Downloading streaming JSON metadata', headers=headers)
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
                e_description = self._parse_json(
--- a/youtube_dl/extractor/yandexvideo.py
+++ b/youtube_dl/extractor/yandexvideo.py
@ -3,6 +3,7 @@ from __future__ import unicode_literals

 from .common import InfoExtractor
 from ..utils import (
+    determine_ext,
    int_or_none,
    url_or_none,
 )
@ -47,6 +48,10 @@ class YandexVideoIE(InfoExtractor):
        # episode, sports
        'url': 'https://yandex.ru/?stream_channel=1538487871&stream_id=4132a07f71fb0396be93d74b3477131d',
        'only_matching': True,
+    }, {
+        # DASH with DRM
+        'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
@ -59,13 +64,22 @@ class YandexVideoIE(InfoExtractor):
                'disable_trackings': 1,
            })['content']

-        m3u8_url = url_or_none(content.get('content_url')) or url_or_none(
+        content_url = url_or_none(content.get('content_url')) or url_or_none(
            content['streams'][0]['url'])
        title = content.get('title') or content.get('computed_title')

-        formats = self._extract_m3u8_formats(
-            m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
-            m3u8_id='hls')
+        ext = determine_ext(content_url)
+
+        if ext == 'm3u8':
+            formats = self._extract_m3u8_formats(
+                content_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                m3u8_id='hls')
+        elif ext == 'mpd':
+            formats = self._extract_mpd_formats(
+                content_url, video_id, mpd_id='dash')
+        else:
+            formats = [{'url': content_url}]
+
        self._sort_formats(formats)

        description = content.get('description')