diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index cd9ccbe96..d0e0a5637 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -366,3 +366,67 @@ duration = float_or_none(video.get('durationMs'), scale=1000)
view_count = int_or_none(video.get('views'))
```
+### Inline values
+
+Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult.
+
+#### Example
+
+Correct:
+
+```python
+title = self._html_search_regex(r'
([^<]+)', webpage, 'title')
+```
+
+Incorrect:
+
+```python
+TITLE_RE = r'([^<]+)'
+# ...some lines of code...
+title = self._html_search_regex(TITLE_RE, webpage, 'title')
+```
+
+### Collapse fallbacks
+
+Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of meta values.
+
+#### Example
+
+Good:
+
+```python
+description = self._html_search_meta(
+ ['og:description', 'description', 'twitter:description'],
+ webpage, 'description', default=None)
+```
+
+Unwieldy:
+
+```python
+description = (
+ self._og_search_description(webpage, default=None)
+ or self._html_search_meta('description', webpage, default=None)
+ or self._html_search_meta('twitter:description', webpage, default=None))
+```
+
+### Trailing parentheses
+
+Always move trailing parentheses after the last argument.
+
+#### Example
+
+Correct:
+
+```python
+ lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
+ list)
+```
+
+Incorrect:
+
+```python
+ lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
+ list,
+)
+```
+
diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py
index 9003545ce..c4b90cd90 100644
--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@@ -5,14 +5,8 @@ import re
import string
from .discoverygo import DiscoveryGoBaseIE
-from ..compat import (
- compat_str,
- compat_urllib_parse_unquote,
-)
-from ..utils import (
- ExtractorError,
- try_get,
-)
+from ..compat import compat_urllib_parse_unquote
+from ..utils import ExtractorError
from ..compat import compat_HTTPError
@@ -40,15 +34,15 @@ class DiscoveryIE(DiscoveryGoBaseIE):
cookingchanneltv|
motortrend
)
- )\.com(?P/tv-shows/[^/]+/(?:video|full-episode)s/(?P[^./?#]+))'''
+ )\.com/tv-shows/[^/]+/(?:video|full-episode)s/(?P[^./?#]+)'''
_TESTS = [{
- 'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley',
+ 'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry',
'info_dict': {
- 'id': '5a2d9b4d6b66d17a5026e1fd',
+ 'id': '5a2f35ce6b66d17a5026e29e',
'ext': 'mp4',
- 'title': 'Dave Foley',
- 'description': 'md5:4b39bcafccf9167ca42810eb5f28b01f',
- 'duration': 608,
+ 'title': 'Riding with Matthew Perry',
+ 'description': 'md5:a34333153e79bc4526019a5129e7f878',
+ 'duration': 84,
},
'params': {
'skip_download': True, # requires ffmpeg
@@ -62,17 +56,10 @@ class DiscoveryIE(DiscoveryGoBaseIE):
}]
_GEO_COUNTRIES = ['US']
_GEO_BYPASS = False
+ _API_BASE_URL = 'https://api.discovery.com/v1/'
def _real_extract(self, url):
- site, path, display_id = re.match(self._VALID_URL, url).groups()
- webpage = self._download_webpage(url, display_id)
-
- react_data = self._parse_json(self._search_regex(
- r'window\.__reactTransmitPacket\s*=\s*({.+?});',
- webpage, 'react data'), display_id)
- content_blocks = react_data['layout'][path]['contentBlocks']
- video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0]
- video_id = video['id']
+ site, display_id = re.match(self._VALID_URL, url).groups()
access_token = None
cookies = self._get_cookies(url)
@@ -82,27 +69,33 @@ class DiscoveryIE(DiscoveryGoBaseIE):
if auth_storage_cookie and auth_storage_cookie.value:
auth_storage = self._parse_json(compat_urllib_parse_unquote(
compat_urllib_parse_unquote(auth_storage_cookie.value)),
- video_id, fatal=False) or {}
+ display_id, fatal=False) or {}
access_token = auth_storage.get('a') or auth_storage.get('access_token')
if not access_token:
access_token = self._download_json(
- 'https://%s.com/anonymous' % site, display_id, query={
+ 'https://%s.com/anonymous' % site, display_id,
+ 'Downloading token JSON metadata', query={
'authRel': 'authorization',
- 'client_id': try_get(
- react_data, lambda x: x['application']['apiClientId'],
- compat_str) or '3020a40c2356a645b4b4',
+ 'client_id': '3020a40c2356a645b4b4',
'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
})['access_token']
- try:
- headers = self.geo_verification_headers()
- headers['Authorization'] = 'Bearer ' + access_token
+ headers = self.geo_verification_headers()
+ headers['Authorization'] = 'Bearer ' + access_token
+ try:
+ video = self._download_json(
+ self._API_BASE_URL + 'content/videos',
+ display_id, 'Downloading content JSON metadata',
+ headers=headers, query={
+ 'slug': display_id,
+ })[0]
+ video_id = video['id']
stream = self._download_json(
- 'https://api.discovery.com/v1/streaming/video/' + video_id,
- display_id, headers=headers)
+ self._API_BASE_URL + 'streaming/video/' + video_id,
+ display_id, 'Downloading streaming JSON metadata', headers=headers)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
e_description = self._parse_json(
diff --git a/youtube_dl/extractor/yandexvideo.py b/youtube_dl/extractor/yandexvideo.py
index 1aea95383..46529be05 100644
--- a/youtube_dl/extractor/yandexvideo.py
+++ b/youtube_dl/extractor/yandexvideo.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
+ determine_ext,
int_or_none,
url_or_none,
)
@@ -47,6 +48,10 @@ class YandexVideoIE(InfoExtractor):
# episode, sports
'url': 'https://yandex.ru/?stream_channel=1538487871&stream_id=4132a07f71fb0396be93d74b3477131d',
'only_matching': True,
+ }, {
+ # DASH with DRM
+ 'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -59,13 +64,22 @@ class YandexVideoIE(InfoExtractor):
'disable_trackings': 1,
})['content']
- m3u8_url = url_or_none(content.get('content_url')) or url_or_none(
+ content_url = url_or_none(content.get('content_url')) or url_or_none(
content['streams'][0]['url'])
title = content.get('title') or content.get('computed_title')
- formats = self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls')
+ ext = determine_ext(content_url)
+
+ if ext == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ content_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ elif ext == 'mpd':
+ formats = self._extract_mpd_formats(
+ content_url, video_id, mpd_id='dash')
+ else:
+ formats = [{'url': content_url}]
+
self._sort_formats(formats)
description = content.get('description')