Merge remote-tracking branch 'upstream/master' into myversion

This commit is contained in:
Andrew Udvare 2018-05-13 03:19:01 -04:00
commit 4ce6a125fc
No known key found for this signature in database
GPG Key ID: 1AFD9AFC120C26DD
12 changed files with 225 additions and 147 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.05.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.05.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.05.01** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.05.09**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2018.05.01 [debug] youtube-dl version 2018.05.09
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -1,3 +1,21 @@
version 2018.05.09
Core
* [YoutubeDL] Ensure ext exists for automatic captions
* Introduce --geo-bypass-ip-block
Extractors
+ [udemy] Extract asset captions
+ [udemy] Extract stream URLs (#16372)
+ [businessinsider] Add support for businessinsider.com (#16387, #16388, #16389)
+ [cloudflarestream] Add support for cloudflarestream.com (#16375)
* [watchbox] Fix extraction (#16356)
* [discovery] Extract Affiliate/Anonymous Auth Token from cookies (#14954)
+ [itv:btcc] Add support for itv.com/btcc (#16139)
* [tunein] Use live title for live streams (#16347)
* [itv] Improve extraction (#16253)
version 2018.05.01 version 2018.05.01
Core Core

View File

@ -116,6 +116,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
--geo-bypass-country CODE Force bypass geographic restriction with --geo-bypass-country CODE Force bypass geographic restriction with
explicitly provided two-letter ISO 3166-2 explicitly provided two-letter ISO 3166-2
country code (experimental) country code (experimental)
--geo-bypass-ip-block IP_BLOCK Force bypass geographic restriction with
explicitly provided IP block in CIDR
notation (experimental)
## Video Selection: ## Video Selection:
--playlist-start NUMBER Playlist video to start at (default is 1) --playlist-start NUMBER Playlist video to start at (default is 1)

View File

@ -122,6 +122,7 @@
- **BRMediathek**: Bayerischer Rundfunk Mediathek - **BRMediathek**: Bayerischer Rundfunk Mediathek
- **bt:article**: Bergens Tidende Articles - **bt:article**: Bergens Tidende Articles
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen - **bt:vestlendingen**: Bergens Tidende - Vestlendingen
- **BusinessInsider**
- **BuzzFeed** - **BuzzFeed**
- **BYUtv** - **BYUtv**
- **Camdemy** - **Camdemy**
@ -163,6 +164,7 @@
- **ClipRs** - **ClipRs**
- **Clipsyndicate** - **Clipsyndicate**
- **CloserToTruth** - **CloserToTruth**
- **CloudflareStream**
- **cloudtime**: CloudTime - **cloudtime**: CloudTime
- **Cloudy** - **Cloudy**
- **Clubic** - **Clubic**
@ -373,6 +375,7 @@
- **Ir90Tv** - **Ir90Tv**
- **ITTF** - **ITTF**
- **ITV** - **ITV**
- **ITVBTCC**
- **ivi**: ivi.ru - **ivi**: ivi.ru
- **ivi:compilation**: ivi.ru compilations - **ivi:compilation**: ivi.ru compilations
- **ivideon**: Ivideon TV - **ivideon**: Ivideon TV

View File

@ -1482,23 +1482,28 @@ class YoutubeDL(object):
if info_dict.get('%s_number' % field) is not None and not info_dict.get(field): if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
subtitles = info_dict.get('subtitles') for cc_kind in ('subtitles', 'automatic_captions'):
if subtitles: cc = info_dict.get(cc_kind)
for _, subtitle in subtitles.items(): if cc:
for _, subtitle in cc.items():
for subtitle_format in subtitle: for subtitle_format in subtitle:
if subtitle_format.get('url'): if subtitle_format.get('url'):
subtitle_format['url'] = sanitize_url(subtitle_format['url']) subtitle_format['url'] = sanitize_url(subtitle_format['url'])
if subtitle_format.get('ext') is None: if subtitle_format.get('ext') is None:
subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower() subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
automatic_captions = info_dict.get('automatic_captions')
subtitles = info_dict.get('subtitles')
if self.params.get('listsubtitles', False): if self.params.get('listsubtitles', False):
if 'automatic_captions' in info_dict: if 'automatic_captions' in info_dict:
self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions') self.list_subtitles(
info_dict['id'], automatic_captions, 'automatic captions')
self.list_subtitles(info_dict['id'], subtitles, 'subtitles') self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
return return
info_dict['requested_subtitles'] = self.process_subtitles( info_dict['requested_subtitles'] = self.process_subtitles(
info_dict['id'], subtitles, info_dict['id'], subtitles, automatic_captions)
info_dict.get('automatic_captions'))
# We now pick which formats have to be downloaded # We now pick which formats have to be downloaded
if info_dict.get('formats') is None: if info_dict.get('formats') is None:

View File

@ -179,6 +179,10 @@ class MixcloudIE(InfoExtractor):
formats.append({ formats.append({
'format_id': 'http', 'format_id': 'http',
'url': decrypted, 'url': decrypted,
'downloader_options': {
# Mixcloud starts throttling at >~5M
'http_chunk_size': 5242880,
},
}) })
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -85,7 +85,7 @@ class NickBrIE(MTVServicesInfoExtractor):
https?:// https?://
(?: (?:
(?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br| (?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br|
(?:www\.)?nickjr\.nl (?:www\.)?nickjr\.[a-z]{2}
) )
/(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?\#.]+) /(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?\#.]+)
''' '''
@ -98,6 +98,9 @@ class NickBrIE(MTVServicesInfoExtractor):
}, { }, {
'url': 'http://www.nickjr.nl/paw-patrol/videos/311-ge-wol-dig-om-terug-te-zijn/', 'url': 'http://www.nickjr.nl/paw-patrol/videos/311-ge-wol-dig-om-terug-te-zijn/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.nickjr.de/blaze-und-die-monster-maschinen/videos/f6caaf8f-e4e8-4cc1-b489-9380d6dcd059/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -47,7 +47,7 @@ class RedditIE(InfoExtractor):
class RedditRIE(InfoExtractor): class RedditRIE(InfoExtractor):
_VALID_URL = r'(?P<url>https?://(?:(?:www|old)\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/?#&]+))' _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/?#&]+))'
_TESTS = [{ _TESTS = [{
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/', 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
'info_dict': { 'info_dict': {
@ -86,6 +86,10 @@ class RedditRIE(InfoExtractor):
# youtube # youtube
'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/', 'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/',
'only_matching': True, 'only_matching': True,
}, {
# reddit video @ nm reddit
'url': 'https://nm.reddit.com/r/Cricket/comments/8idvby/lousy_cameraman_finds_himself_in_cairns_line_of/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1,35 +1,34 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import binascii
import re
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_b64decode,
compat_ord,
)
from ..utils import ( from ..utils import (
ExtractorError,
qualities,
determine_ext, determine_ext,
ExtractorError,
int_or_none,
mimetype2ext,
parse_duration,
parse_iso8601,
qualities,
) )
class TeamcocoIE(InfoExtractor): class TeamcocoIE(InfoExtractor):
_VALID_URL = r'https?://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)' _VALID_URL = r'https?://teamcoco\.com/video/(?P<id>[^/?#]+)'
_TESTS = [ _TESTS = [
{ {
'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant', 'url': 'http://teamcoco.com/video/mary-kay-remote',
'md5': '3f7746aa0dc86de18df7539903d399ea', 'md5': '55d532f81992f5c92046ad02fec34d7d',
'info_dict': { 'info_dict': {
'id': '80187', 'id': '80187',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Conan Becomes A Mary Kay Beauty Consultant', 'title': 'Conan Becomes A Mary Kay Beauty Consultant',
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.', 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',
'duration': 504, 'duration': 495.0,
'age_limit': 0, 'upload_date': '20140402',
'timestamp': 1396407600,
} }
}, { }, {
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush', 'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
@ -40,7 +39,8 @@ class TeamcocoIE(InfoExtractor):
'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.', 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
'title': 'Louis C.K. Interview Pt. 1 11/3/11', 'title': 'Louis C.K. Interview Pt. 1 11/3/11',
'duration': 288, 'duration': 288,
'age_limit': 0, 'upload_date': '20111104',
'timestamp': 1320405840,
} }
}, { }, {
'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey', 'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey',
@ -49,6 +49,8 @@ class TeamcocoIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Timothy Olyphant Raises A Toast To “Justified”', 'title': 'Timothy Olyphant Raises A Toast To “Justified”',
'description': 'md5:15501f23f020e793aeca761205e42c24', 'description': 'md5:15501f23f020e793aeca761205e42c24',
'upload_date': '20150415',
'timestamp': 1429088400,
}, },
'params': { 'params': {
'skip_download': True, # m3u8 downloads 'skip_download': True, # m3u8 downloads
@ -63,110 +65,93 @@ class TeamcocoIE(InfoExtractor):
}, },
'params': { 'params': {
'skip_download': True, # m3u8 downloads 'skip_download': True, # m3u8 downloads
} },
'skip': 'This video is no longer available.',
} }
] ]
_VIDEO_ID_REGEXES = (
r'"eVar42"\s*:\s*(\d+)', def _graphql_call(self, query_template, object_type, object_id):
r'Ginger\.TeamCoco\.openInApp\("video",\s*"([^"]+)"', find_object = 'find' + object_type
r'"id_not"\s*:\s*(\d+)' return self._download_json(
) 'http://teamcoco.com/graphql/', object_id, data=json.dumps({
'query': query_template % (find_object, object_id)
}))['data'][find_object]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) display_id = self._match_id(url)
display_id = mobj.group('display_id') response = self._graphql_call('''{
webpage, urlh = self._download_webpage_handle(url, display_id) %s(slug: "video/%s") {
if 'src=expired' in urlh.geturl(): ... on RecordSlug {
raise ExtractorError('This video is expired.', expected=True) record {
id
title
teaser
publishOn
thumb {
preview
}
tags {
name
}
duration
}
}
... on NotFoundSlug {
status
}
}
}''', 'Slug', display_id)
if response.get('status'):
raise ExtractorError('This video is no longer available.', expected=True)
video_id = mobj.group('video_id') record = response['record']
if not video_id: video_id = record['id']
video_id = self._html_search_regex(
self._VIDEO_ID_REGEXES, webpage, 'video id')
data = None srcs = self._graphql_call('''{
%s(id: "%s") {
preload_codes = self._html_search_regex( src
r'(function.+)setTimeout\(function\(\)\{playlist', }
webpage, 'preload codes') }''', 'RecordVideoSource', video_id)['src']
base64_fragments = re.findall(r'"([a-zA-Z0-9+/=]+)"', preload_codes)
base64_fragments.remove('init')
def _check_sequence(cur_fragments):
if not cur_fragments:
return
for i in range(len(cur_fragments)):
cur_sequence = (''.join(cur_fragments[i:] + cur_fragments[:i])).encode('ascii')
try:
raw_data = compat_b64decode(cur_sequence)
if compat_ord(raw_data[0]) == compat_ord('{'):
return json.loads(raw_data.decode('utf-8'))
except (TypeError, binascii.Error, UnicodeDecodeError, ValueError):
continue
def _check_data():
for i in range(len(base64_fragments) + 1):
for j in range(i, len(base64_fragments) + 1):
data = _check_sequence(base64_fragments[:i] + base64_fragments[j:])
if data:
return data
self.to_screen('Try to compute possible data sequence. This may take some time.')
data = _check_data()
if not data:
raise ExtractorError(
'Preload information could not be extracted', expected=True)
formats = [] formats = []
get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p']) get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
for filed in data['files']: for format_id, src in srcs.items():
if determine_ext(filed['url']) == 'm3u8': if not isinstance(src, dict):
# compat_urllib_parse.urljoin does not work here
if filed['url'].startswith('/'):
m3u8_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + filed['url']
else:
m3u8_url = filed['url']
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4')
for m3u8_format in m3u8_formats:
if m3u8_format not in formats:
formats.append(m3u8_format)
elif determine_ext(filed['url']) == 'f4m':
# TODO Correct f4m extraction
continue continue
src_url = src.get('src')
if not src_url:
continue
ext = determine_ext(src_url, mimetype2ext(src.get('type')))
if format_id == 'hls' or ext == 'm3u8':
# compat_urllib_parse.urljoin does not work here
if src_url.startswith('/'):
src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
formats.extend(self._extract_m3u8_formats(
src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
else: else:
if filed['url'].startswith('/mp4:protected/'): if src_url.startswith('/mp4:protected/'):
# TODO Correct extraction for these files # TODO Correct extraction for these files
continue continue
m_format = re.search(r'(\d+(k|p))\.mp4', filed['url']) tbr = int_or_none(self._search_regex(
if m_format is not None: r'(\d+)k\.mp4', src_url, 'tbr', default=None))
format_id = m_format.group(1)
else:
format_id = filed['bitrate']
tbr = (
int(filed['bitrate'])
if filed['bitrate'].isdigit()
else None)
formats.append({ formats.append({
'url': filed['url'], 'url': src_url,
'ext': 'mp4', 'ext': ext,
'tbr': tbr, 'tbr': tbr,
'format_id': format_id, 'format_id': format_id,
'quality': get_quality(format_id), 'quality': get_quality(format_id),
}) })
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'formats': formats, 'formats': formats,
'title': data['title'], 'title': record['title'],
'thumbnail': data.get('thumb', {}).get('href'), 'thumbnail': record.get('thumb', {}).get('preview'),
'description': data.get('teaser'), 'description': record.get('teaser'),
'duration': data.get('duration'), 'duration': parse_duration(record.get('duration')),
'age_limit': self._family_friendly_search(webpage), 'timestamp': parse_iso8601(record.get('publishOn')),
} }

View File

@ -8,6 +8,7 @@ import random
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_HTTPError, compat_HTTPError,
compat_kwargs,
compat_parse_qs, compat_parse_qs,
compat_str, compat_str,
compat_urllib_parse_urlencode, compat_urllib_parse_urlencode,
@ -16,11 +17,14 @@ from ..compat import (
from ..utils import ( from ..utils import (
clean_html, clean_html,
ExtractorError, ExtractorError,
float_or_none,
int_or_none, int_or_none,
js_to_json,
orderedSet, orderedSet,
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
qualities,
try_get,
unified_timestamp,
update_url_query, update_url_query,
urlencode_postdata, urlencode_postdata,
urljoin, urljoin,
@ -45,10 +49,11 @@ class TwitchBaseIE(InfoExtractor):
'%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')), '%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
expected=True) expected=True)
def _call_api(self, path, item_id, note): def _call_api(self, path, item_id, *args, **kwargs):
kwargs.setdefault('headers', {})['Client-ID'] = self._CLIENT_ID
response = self._download_json( response = self._download_json(
'%s/%s' % (self._API_BASE, path), item_id, note, '%s/%s' % (self._API_BASE, path), item_id,
headers={'Client-ID': self._CLIENT_ID}) *args, **compat_kwargs(kwargs))
self._handle_error(response) self._handle_error(response)
return response return response
@ -622,21 +627,23 @@ class TwitchStreamIE(TwitchBaseIE):
} }
class TwitchClipsIE(InfoExtractor): class TwitchClipsIE(TwitchBaseIE):
IE_NAME = 'twitch:clips' IE_NAME = 'twitch:clips'
_VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)' _VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://clips.twitch.tv/ea/AggressiveCobraPoooound', 'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
'md5': '761769e1eafce0ffebfb4089cb3847cd', 'md5': '761769e1eafce0ffebfb4089cb3847cd',
'info_dict': { 'info_dict': {
'id': 'AggressiveCobraPoooound', 'id': '42850523',
'ext': 'mp4', 'ext': 'mp4',
'title': 'EA Play 2016 Live from the Novo Theatre', 'title': 'EA Play 2016 Live from the Novo Theatre',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1465767393,
'upload_date': '20160612',
'creator': 'EA', 'creator': 'EA',
'uploader': 'stereotype_', 'uploader': 'stereotype_',
'uploader_id': 'stereotype_', 'uploader_id': '43566419',
}, },
}, { }, {
# multiple formats # multiple formats
@ -647,34 +654,63 @@ class TwitchClipsIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) status = self._download_json(
'https://clips.twitch.tv/api/v2/clips/%s/status' % video_id,
video_id)
clip = self._parse_json( formats = []
self._search_regex(
r'(?s)clipInfo\s*=\s*({.+?});', webpage, 'clip info'),
video_id, transform_source=js_to_json)
title = clip.get('title') or clip.get('channel_title') or self._og_search_title(webpage) for option in status['quality_options']:
if not isinstance(option, dict):
formats = [{ continue
'url': option['source'], source = option.get('source')
if not source or not isinstance(source, compat_str):
continue
formats.append({
'url': source,
'format_id': option.get('quality'), 'format_id': option.get('quality'),
'height': int_or_none(option.get('quality')), 'height': int_or_none(option.get('quality')),
} for option in clip.get('quality_options', []) if option.get('source')] 'fps': int_or_none(option.get('frame_rate')),
})
if not formats:
formats = [{
'url': clip['clip_video_url'],
}]
self._sort_formats(formats) self._sort_formats(formats)
return { info = {
'id': video_id,
'title': title,
'thumbnail': self._og_search_thumbnail(webpage),
'creator': clip.get('broadcaster_display_name') or clip.get('broadcaster_login'),
'uploader': clip.get('curator_login'),
'uploader_id': clip.get('curator_display_name'),
'formats': formats, 'formats': formats,
} }
clip = self._call_api(
'kraken/clips/%s' % video_id, video_id, fatal=False, headers={
'Accept': 'application/vnd.twitchtv.v5+json',
})
if clip:
quality_key = qualities(('tiny', 'small', 'medium'))
thumbnails = []
thumbnails_dict = clip.get('thumbnails')
if isinstance(thumbnails_dict, dict):
for thumbnail_id, thumbnail_url in thumbnails_dict.items():
thumbnails.append({
'id': thumbnail_id,
'url': thumbnail_url,
'preference': quality_key(thumbnail_id),
})
info.update({
'id': clip.get('tracking_id') or video_id,
'title': clip.get('title') or video_id,
'duration': float_or_none(clip.get('duration')),
'views': int_or_none(clip.get('views')),
'timestamp': unified_timestamp(clip.get('created_at')),
'thumbnails': thumbnails,
'creator': try_get(clip, lambda x: x['broadcaster']['display_name'], compat_str),
'uploader': try_get(clip, lambda x: x['curator']['display_name'], compat_str),
'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
})
else:
info.update({
'title': video_id,
'id': video_id,
})
return info

View File

@ -18,6 +18,7 @@ from ..utils import (
int_or_none, int_or_none,
js_to_json, js_to_json,
sanitized_Request, sanitized_Request,
try_get,
unescapeHTML, unescapeHTML,
urlencode_postdata, urlencode_postdata,
) )
@ -105,7 +106,7 @@ class UdemyIE(InfoExtractor):
% (course_id, lecture_id), % (course_id, lecture_id),
lecture_id, 'Downloading lecture JSON', query={ lecture_id, 'Downloading lecture JSON', query={
'fields[lecture]': 'title,description,view_html,asset', 'fields[lecture]': 'title,description,view_html,asset',
'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,data', 'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data',
}) })
def _handle_error(self, response): def _handle_error(self, response):
@ -303,9 +304,25 @@ class UdemyIE(InfoExtractor):
'url': src, 'url': src,
}) })
download_urls = asset.get('download_urls') for url_kind in ('download', 'stream'):
if isinstance(download_urls, dict): urls = asset.get('%s_urls' % url_kind)
extract_formats(download_urls.get('Video')) if isinstance(urls, dict):
extract_formats(urls.get('Video'))
captions = asset.get('captions')
if isinstance(captions, list):
for cc in captions:
if not isinstance(cc, dict):
continue
cc_url = cc.get('url')
if not cc_url or not isinstance(cc_url, compat_str):
continue
lang = try_get(cc, lambda x: x['locale']['locale'], compat_str)
sub_dict = (automatic_captions if cc.get('source') == 'auto'
else subtitles)
sub_dict.setdefault(lang or 'en', []).append({
'url': cc_url,
})
view_html = lecture.get('view_html') view_html = lecture.get('view_html')
if view_html: if view_html:

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2018.05.01' __version__ = '2018.05.09'