Merge pull request #55 from ytdl-org/master

[pull] master from ytdl-org:master
This commit is contained in:
pull[bot] 2019-08-01 22:19:02 +00:00 committed by GitHub
commit f660790905
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 108 additions and 37 deletions

View File

@ -366,3 +366,67 @@ duration = float_or_none(video.get('durationMs'), scale=1000)
view_count = int_or_none(video.get('views')) view_count = int_or_none(video.get('views'))
``` ```
### Inline values
Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult.
#### Example
Correct:
```python
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
```
Incorrect:
```python
TITLE_RE = r'<title>([^<]+)</title>'
# ...some lines of code...
title = self._html_search_regex(TITLE_RE, webpage, 'title')
```
### Collapse fallbacks
Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of meta values.
#### Example
Good:
```python
description = self._html_search_meta(
['og:description', 'description', 'twitter:description'],
webpage, 'description', default=None)
```
Unwieldy:
```python
description = (
self._og_search_description(webpage, default=None)
or self._html_search_meta('description', webpage, default=None)
or self._html_search_meta('twitter:description', webpage, default=None))
```
### Trailing parentheses
Always move trailing parentheses after the last argument.
#### Example
Correct:
```python
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
list)
```
Incorrect:
```python
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
list,
)
```

View File

@ -5,14 +5,8 @@ import re
import string import string
from .discoverygo import DiscoveryGoBaseIE from .discoverygo import DiscoveryGoBaseIE
from ..compat import ( from ..compat import compat_urllib_parse_unquote
compat_str, from ..utils import ExtractorError
compat_urllib_parse_unquote,
)
from ..utils import (
ExtractorError,
try_get,
)
from ..compat import compat_HTTPError from ..compat import compat_HTTPError
@ -40,15 +34,15 @@ class DiscoveryIE(DiscoveryGoBaseIE):
cookingchanneltv| cookingchanneltv|
motortrend motortrend
) )
)\.com(?P<path>/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+))''' )\.com/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+)'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley', 'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry',
'info_dict': { 'info_dict': {
'id': '5a2d9b4d6b66d17a5026e1fd', 'id': '5a2f35ce6b66d17a5026e29e',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Dave Foley', 'title': 'Riding with Matthew Perry',
'description': 'md5:4b39bcafccf9167ca42810eb5f28b01f', 'description': 'md5:a34333153e79bc4526019a5129e7f878',
'duration': 608, 'duration': 84,
}, },
'params': { 'params': {
'skip_download': True, # requires ffmpeg 'skip_download': True, # requires ffmpeg
@ -62,17 +56,10 @@ class DiscoveryIE(DiscoveryGoBaseIE):
}] }]
_GEO_COUNTRIES = ['US'] _GEO_COUNTRIES = ['US']
_GEO_BYPASS = False _GEO_BYPASS = False
_API_BASE_URL = 'https://api.discovery.com/v1/'
def _real_extract(self, url): def _real_extract(self, url):
site, path, display_id = re.match(self._VALID_URL, url).groups() site, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id)
react_data = self._parse_json(self._search_regex(
r'window\.__reactTransmitPacket\s*=\s*({.+?});',
webpage, 'react data'), display_id)
content_blocks = react_data['layout'][path]['contentBlocks']
video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0]
video_id = video['id']
access_token = None access_token = None
cookies = self._get_cookies(url) cookies = self._get_cookies(url)
@ -82,27 +69,33 @@ class DiscoveryIE(DiscoveryGoBaseIE):
if auth_storage_cookie and auth_storage_cookie.value: if auth_storage_cookie and auth_storage_cookie.value:
auth_storage = self._parse_json(compat_urllib_parse_unquote( auth_storage = self._parse_json(compat_urllib_parse_unquote(
compat_urllib_parse_unquote(auth_storage_cookie.value)), compat_urllib_parse_unquote(auth_storage_cookie.value)),
video_id, fatal=False) or {} display_id, fatal=False) or {}
access_token = auth_storage.get('a') or auth_storage.get('access_token') access_token = auth_storage.get('a') or auth_storage.get('access_token')
if not access_token: if not access_token:
access_token = self._download_json( access_token = self._download_json(
'https://%s.com/anonymous' % site, display_id, query={ 'https://%s.com/anonymous' % site, display_id,
'Downloading token JSON metadata', query={
'authRel': 'authorization', 'authRel': 'authorization',
'client_id': try_get( 'client_id': '3020a40c2356a645b4b4',
react_data, lambda x: x['application']['apiClientId'],
compat_str) or '3020a40c2356a645b4b4',
'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]), 'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site, 'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
})['access_token'] })['access_token']
try:
headers = self.geo_verification_headers() headers = self.geo_verification_headers()
headers['Authorization'] = 'Bearer ' + access_token headers['Authorization'] = 'Bearer ' + access_token
try:
video = self._download_json(
self._API_BASE_URL + 'content/videos',
display_id, 'Downloading content JSON metadata',
headers=headers, query={
'slug': display_id,
})[0]
video_id = video['id']
stream = self._download_json( stream = self._download_json(
'https://api.discovery.com/v1/streaming/video/' + video_id, self._API_BASE_URL + 'streaming/video/' + video_id,
display_id, headers=headers) display_id, 'Downloading streaming JSON metadata', headers=headers)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
e_description = self._parse_json( e_description = self._parse_json(

View File

@ -3,6 +3,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext,
int_or_none, int_or_none,
url_or_none, url_or_none,
) )
@ -47,6 +48,10 @@ class YandexVideoIE(InfoExtractor):
# episode, sports # episode, sports
'url': 'https://yandex.ru/?stream_channel=1538487871&stream_id=4132a07f71fb0396be93d74b3477131d', 'url': 'https://yandex.ru/?stream_channel=1538487871&stream_id=4132a07f71fb0396be93d74b3477131d',
'only_matching': True, 'only_matching': True,
}, {
# DASH with DRM
'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -59,13 +64,22 @@ class YandexVideoIE(InfoExtractor):
'disable_trackings': 1, 'disable_trackings': 1,
})['content'] })['content']
m3u8_url = url_or_none(content.get('content_url')) or url_or_none( content_url = url_or_none(content.get('content_url')) or url_or_none(
content['streams'][0]['url']) content['streams'][0]['url'])
title = content.get('title') or content.get('computed_title') title = content.get('title') or content.get('computed_title')
ext = determine_ext(content_url)
if ext == 'm3u8':
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', content_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls') m3u8_id='hls')
elif ext == 'mpd':
formats = self._extract_mpd_formats(
content_url, video_id, mpd_id='dash')
else:
formats = [{'url': content_url}]
self._sort_formats(formats) self._sort_formats(formats)
description = content.get('description') description = content.get('description')