Merge pull request #55 from ytdl-org/master

[pull] master from ytdl-org:master
This commit is contained in:
pull[bot] 2019-08-01 22:19:02 +00:00 committed by GitHub
commit f660790905
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 108 additions and 37 deletions

View File

@ -366,3 +366,67 @@ duration = float_or_none(video.get('durationMs'), scale=1000)
view_count = int_or_none(video.get('views'))
```
### Inline values
Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult.
#### Example
Correct:
```python
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
```
Incorrect:
```python
TITLE_RE = r'<title>([^<]+)</title>'
# ...some lines of code...
title = self._html_search_regex(TITLE_RE, webpage, 'title')
```
### Collapse fallbacks
Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of meta values.
#### Example
Good:
```python
description = self._html_search_meta(
['og:description', 'description', 'twitter:description'],
webpage, 'description', default=None)
```
Unwieldy:
```python
description = (
self._og_search_description(webpage, default=None)
or self._html_search_meta('description', webpage, default=None)
or self._html_search_meta('twitter:description', webpage, default=None))
```
### Trailing parentheses
Always move trailing parentheses after the last argument.
#### Example
Correct:
```python
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
list)
```
Incorrect:
```python
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
list,
)
```

View File

@ -5,14 +5,8 @@ import re
import string
from .discoverygo import DiscoveryGoBaseIE
from ..compat import (
compat_str,
compat_urllib_parse_unquote,
)
from ..utils import (
ExtractorError,
try_get,
)
from ..compat import compat_urllib_parse_unquote
from ..utils import ExtractorError
from ..compat import compat_HTTPError
@ -40,15 +34,15 @@ class DiscoveryIE(DiscoveryGoBaseIE):
cookingchanneltv|
motortrend
)
)\.com(?P<path>/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+))'''
)\.com/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+)'''
_TESTS = [{
'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley',
'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry',
'info_dict': {
'id': '5a2d9b4d6b66d17a5026e1fd',
'id': '5a2f35ce6b66d17a5026e29e',
'ext': 'mp4',
'title': 'Dave Foley',
'description': 'md5:4b39bcafccf9167ca42810eb5f28b01f',
'duration': 608,
'title': 'Riding with Matthew Perry',
'description': 'md5:a34333153e79bc4526019a5129e7f878',
'duration': 84,
},
'params': {
'skip_download': True, # requires ffmpeg
@ -62,17 +56,10 @@ class DiscoveryIE(DiscoveryGoBaseIE):
}]
_GEO_COUNTRIES = ['US']
_GEO_BYPASS = False
_API_BASE_URL = 'https://api.discovery.com/v1/'
def _real_extract(self, url):
site, path, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id)
react_data = self._parse_json(self._search_regex(
r'window\.__reactTransmitPacket\s*=\s*({.+?});',
webpage, 'react data'), display_id)
content_blocks = react_data['layout'][path]['contentBlocks']
video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0]
video_id = video['id']
site, display_id = re.match(self._VALID_URL, url).groups()
access_token = None
cookies = self._get_cookies(url)
@ -82,27 +69,33 @@ class DiscoveryIE(DiscoveryGoBaseIE):
if auth_storage_cookie and auth_storage_cookie.value:
auth_storage = self._parse_json(compat_urllib_parse_unquote(
compat_urllib_parse_unquote(auth_storage_cookie.value)),
video_id, fatal=False) or {}
display_id, fatal=False) or {}
access_token = auth_storage.get('a') or auth_storage.get('access_token')
if not access_token:
access_token = self._download_json(
'https://%s.com/anonymous' % site, display_id, query={
'https://%s.com/anonymous' % site, display_id,
'Downloading token JSON metadata', query={
'authRel': 'authorization',
'client_id': try_get(
react_data, lambda x: x['application']['apiClientId'],
compat_str) or '3020a40c2356a645b4b4',
'client_id': '3020a40c2356a645b4b4',
'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
})['access_token']
try:
headers = self.geo_verification_headers()
headers['Authorization'] = 'Bearer ' + access_token
try:
video = self._download_json(
self._API_BASE_URL + 'content/videos',
display_id, 'Downloading content JSON metadata',
headers=headers, query={
'slug': display_id,
})[0]
video_id = video['id']
stream = self._download_json(
'https://api.discovery.com/v1/streaming/video/' + video_id,
display_id, headers=headers)
self._API_BASE_URL + 'streaming/video/' + video_id,
display_id, 'Downloading streaming JSON metadata', headers=headers)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
e_description = self._parse_json(

View File

@ -3,6 +3,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
url_or_none,
)
@ -47,6 +48,10 @@ class YandexVideoIE(InfoExtractor):
# episode, sports
'url': 'https://yandex.ru/?stream_channel=1538487871&stream_id=4132a07f71fb0396be93d74b3477131d',
'only_matching': True,
}, {
# DASH with DRM
'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8',
'only_matching': True,
}]
def _real_extract(self, url):
@ -59,13 +64,22 @@ class YandexVideoIE(InfoExtractor):
'disable_trackings': 1,
})['content']
m3u8_url = url_or_none(content.get('content_url')) or url_or_none(
content_url = url_or_none(content.get('content_url')) or url_or_none(
content['streams'][0]['url'])
title = content.get('title') or content.get('computed_title')
ext = determine_ext(content_url)
if ext == 'm3u8':
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
content_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
elif ext == 'mpd':
formats = self._extract_mpd_formats(
content_url, video_id, mpd_id='dash')
else:
formats = [{'url': content_url}]
self._sort_formats(formats)
description = content.get('description')