Merge remote-tracking branch 'upstream/master' into myversion
This commit is contained in:
commit
4ce6a125fc
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.05.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.05.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.05.01**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.05.09**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2018.05.01
|
[debug] youtube-dl version 2018.05.09
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
18
ChangeLog
18
ChangeLog
@ -1,3 +1,21 @@
|
|||||||
|
version 2018.05.09
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [YoutubeDL] Ensure ext exists for automatic captions
|
||||||
|
* Introduce --geo-bypass-ip-block
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [udemy] Extract asset captions
|
||||||
|
+ [udemy] Extract stream URLs (#16372)
|
||||||
|
+ [businessinsider] Add support for businessinsider.com (#16387, #16388, #16389)
|
||||||
|
+ [cloudflarestream] Add support for cloudflarestream.com (#16375)
|
||||||
|
* [watchbox] Fix extraction (#16356)
|
||||||
|
* [discovery] Extract Affiliate/Anonymous Auth Token from cookies (#14954)
|
||||||
|
+ [itv:btcc] Add support for itv.com/btcc (#16139)
|
||||||
|
* [tunein] Use live title for live streams (#16347)
|
||||||
|
* [itv] Improve extraction (#16253)
|
||||||
|
|
||||||
|
|
||||||
version 2018.05.01
|
version 2018.05.01
|
||||||
|
|
||||||
Core
|
Core
|
||||||
|
@ -116,6 +116,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
--geo-bypass-country CODE Force bypass geographic restriction with
|
--geo-bypass-country CODE Force bypass geographic restriction with
|
||||||
explicitly provided two-letter ISO 3166-2
|
explicitly provided two-letter ISO 3166-2
|
||||||
country code (experimental)
|
country code (experimental)
|
||||||
|
--geo-bypass-ip-block IP_BLOCK Force bypass geographic restriction with
|
||||||
|
explicitly provided IP block in CIDR
|
||||||
|
notation (experimental)
|
||||||
|
|
||||||
## Video Selection:
|
## Video Selection:
|
||||||
--playlist-start NUMBER Playlist video to start at (default is 1)
|
--playlist-start NUMBER Playlist video to start at (default is 1)
|
||||||
|
@ -122,6 +122,7 @@
|
|||||||
- **BRMediathek**: Bayerischer Rundfunk Mediathek
|
- **BRMediathek**: Bayerischer Rundfunk Mediathek
|
||||||
- **bt:article**: Bergens Tidende Articles
|
- **bt:article**: Bergens Tidende Articles
|
||||||
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen
|
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen
|
||||||
|
- **BusinessInsider**
|
||||||
- **BuzzFeed**
|
- **BuzzFeed**
|
||||||
- **BYUtv**
|
- **BYUtv**
|
||||||
- **Camdemy**
|
- **Camdemy**
|
||||||
@ -163,6 +164,7 @@
|
|||||||
- **ClipRs**
|
- **ClipRs**
|
||||||
- **Clipsyndicate**
|
- **Clipsyndicate**
|
||||||
- **CloserToTruth**
|
- **CloserToTruth**
|
||||||
|
- **CloudflareStream**
|
||||||
- **cloudtime**: CloudTime
|
- **cloudtime**: CloudTime
|
||||||
- **Cloudy**
|
- **Cloudy**
|
||||||
- **Clubic**
|
- **Clubic**
|
||||||
@ -373,6 +375,7 @@
|
|||||||
- **Ir90Tv**
|
- **Ir90Tv**
|
||||||
- **ITTF**
|
- **ITTF**
|
||||||
- **ITV**
|
- **ITV**
|
||||||
|
- **ITVBTCC**
|
||||||
- **ivi**: ivi.ru
|
- **ivi**: ivi.ru
|
||||||
- **ivi:compilation**: ivi.ru compilations
|
- **ivi:compilation**: ivi.ru compilations
|
||||||
- **ivideon**: Ivideon TV
|
- **ivideon**: Ivideon TV
|
||||||
|
@ -1482,23 +1482,28 @@ class YoutubeDL(object):
|
|||||||
if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
|
if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
|
||||||
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
|
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
|
||||||
|
|
||||||
subtitles = info_dict.get('subtitles')
|
for cc_kind in ('subtitles', 'automatic_captions'):
|
||||||
if subtitles:
|
cc = info_dict.get(cc_kind)
|
||||||
for _, subtitle in subtitles.items():
|
if cc:
|
||||||
|
for _, subtitle in cc.items():
|
||||||
for subtitle_format in subtitle:
|
for subtitle_format in subtitle:
|
||||||
if subtitle_format.get('url'):
|
if subtitle_format.get('url'):
|
||||||
subtitle_format['url'] = sanitize_url(subtitle_format['url'])
|
subtitle_format['url'] = sanitize_url(subtitle_format['url'])
|
||||||
if subtitle_format.get('ext') is None:
|
if subtitle_format.get('ext') is None:
|
||||||
subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
|
subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
|
||||||
|
|
||||||
|
automatic_captions = info_dict.get('automatic_captions')
|
||||||
|
subtitles = info_dict.get('subtitles')
|
||||||
|
|
||||||
if self.params.get('listsubtitles', False):
|
if self.params.get('listsubtitles', False):
|
||||||
if 'automatic_captions' in info_dict:
|
if 'automatic_captions' in info_dict:
|
||||||
self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
|
self.list_subtitles(
|
||||||
|
info_dict['id'], automatic_captions, 'automatic captions')
|
||||||
self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
|
self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
|
||||||
return
|
return
|
||||||
|
|
||||||
info_dict['requested_subtitles'] = self.process_subtitles(
|
info_dict['requested_subtitles'] = self.process_subtitles(
|
||||||
info_dict['id'], subtitles,
|
info_dict['id'], subtitles, automatic_captions)
|
||||||
info_dict.get('automatic_captions'))
|
|
||||||
|
|
||||||
# We now pick which formats have to be downloaded
|
# We now pick which formats have to be downloaded
|
||||||
if info_dict.get('formats') is None:
|
if info_dict.get('formats') is None:
|
||||||
|
@ -179,6 +179,10 @@ class MixcloudIE(InfoExtractor):
|
|||||||
formats.append({
|
formats.append({
|
||||||
'format_id': 'http',
|
'format_id': 'http',
|
||||||
'url': decrypted,
|
'url': decrypted,
|
||||||
|
'downloader_options': {
|
||||||
|
# Mixcloud starts throttling at >~5M
|
||||||
|
'http_chunk_size': 5242880,
|
||||||
|
},
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
@ -85,7 +85,7 @@ class NickBrIE(MTVServicesInfoExtractor):
|
|||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br|
|
(?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br|
|
||||||
(?:www\.)?nickjr\.nl
|
(?:www\.)?nickjr\.[a-z]{2}
|
||||||
)
|
)
|
||||||
/(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?\#.]+)
|
/(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?\#.]+)
|
||||||
'''
|
'''
|
||||||
@ -98,6 +98,9 @@ class NickBrIE(MTVServicesInfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.nickjr.nl/paw-patrol/videos/311-ge-wol-dig-om-terug-te-zijn/',
|
'url': 'http://www.nickjr.nl/paw-patrol/videos/311-ge-wol-dig-om-terug-te-zijn/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.nickjr.de/blaze-und-die-monster-maschinen/videos/f6caaf8f-e4e8-4cc1-b489-9380d6dcd059/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -47,7 +47,7 @@ class RedditIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class RedditRIE(InfoExtractor):
|
class RedditRIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?P<url>https?://(?:(?:www|old)\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/?#&]+))'
|
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/?#&]+))'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
|
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -86,6 +86,10 @@ class RedditRIE(InfoExtractor):
|
|||||||
# youtube
|
# youtube
|
||||||
'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/',
|
'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# reddit video @ nm reddit
|
||||||
|
'url': 'https://nm.reddit.com/r/Cricket/comments/8idvby/lousy_cameraman_finds_himself_in_cairns_line_of/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -1,35 +1,34 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import binascii
|
|
||||||
import re
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
|
||||||
compat_b64decode,
|
|
||||||
compat_ord,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
|
||||||
qualities,
|
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
mimetype2ext,
|
||||||
|
parse_duration,
|
||||||
|
parse_iso8601,
|
||||||
|
qualities,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TeamcocoIE(InfoExtractor):
|
class TeamcocoIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)'
|
_VALID_URL = r'https?://teamcoco\.com/video/(?P<id>[^/?#]+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
|
'url': 'http://teamcoco.com/video/mary-kay-remote',
|
||||||
'md5': '3f7746aa0dc86de18df7539903d399ea',
|
'md5': '55d532f81992f5c92046ad02fec34d7d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '80187',
|
'id': '80187',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Conan Becomes A Mary Kay Beauty Consultant',
|
'title': 'Conan Becomes A Mary Kay Beauty Consultant',
|
||||||
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',
|
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',
|
||||||
'duration': 504,
|
'duration': 495.0,
|
||||||
'age_limit': 0,
|
'upload_date': '20140402',
|
||||||
|
'timestamp': 1396407600,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
|
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
|
||||||
@ -40,7 +39,8 @@ class TeamcocoIE(InfoExtractor):
|
|||||||
'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
|
'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
|
||||||
'title': 'Louis C.K. Interview Pt. 1 11/3/11',
|
'title': 'Louis C.K. Interview Pt. 1 11/3/11',
|
||||||
'duration': 288,
|
'duration': 288,
|
||||||
'age_limit': 0,
|
'upload_date': '20111104',
|
||||||
|
'timestamp': 1320405840,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey',
|
'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey',
|
||||||
@ -49,6 +49,8 @@ class TeamcocoIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Timothy Olyphant Raises A Toast To “Justified”',
|
'title': 'Timothy Olyphant Raises A Toast To “Justified”',
|
||||||
'description': 'md5:15501f23f020e793aeca761205e42c24',
|
'description': 'md5:15501f23f020e793aeca761205e42c24',
|
||||||
|
'upload_date': '20150415',
|
||||||
|
'timestamp': 1429088400,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # m3u8 downloads
|
'skip_download': True, # m3u8 downloads
|
||||||
@ -63,110 +65,93 @@ class TeamcocoIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # m3u8 downloads
|
'skip_download': True, # m3u8 downloads
|
||||||
}
|
},
|
||||||
|
'skip': 'This video is no longer available.',
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
_VIDEO_ID_REGEXES = (
|
|
||||||
r'"eVar42"\s*:\s*(\d+)',
|
def _graphql_call(self, query_template, object_type, object_id):
|
||||||
r'Ginger\.TeamCoco\.openInApp\("video",\s*"([^"]+)"',
|
find_object = 'find' + object_type
|
||||||
r'"id_not"\s*:\s*(\d+)'
|
return self._download_json(
|
||||||
)
|
'http://teamcoco.com/graphql/', object_id, data=json.dumps({
|
||||||
|
'query': query_template % (find_object, object_id)
|
||||||
|
}))['data'][find_object]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
display_id = mobj.group('display_id')
|
response = self._graphql_call('''{
|
||||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
%s(slug: "video/%s") {
|
||||||
if 'src=expired' in urlh.geturl():
|
... on RecordSlug {
|
||||||
raise ExtractorError('This video is expired.', expected=True)
|
record {
|
||||||
|
id
|
||||||
|
title
|
||||||
|
teaser
|
||||||
|
publishOn
|
||||||
|
thumb {
|
||||||
|
preview
|
||||||
|
}
|
||||||
|
tags {
|
||||||
|
name
|
||||||
|
}
|
||||||
|
duration
|
||||||
|
}
|
||||||
|
}
|
||||||
|
... on NotFoundSlug {
|
||||||
|
status
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}''', 'Slug', display_id)
|
||||||
|
if response.get('status'):
|
||||||
|
raise ExtractorError('This video is no longer available.', expected=True)
|
||||||
|
|
||||||
video_id = mobj.group('video_id')
|
record = response['record']
|
||||||
if not video_id:
|
video_id = record['id']
|
||||||
video_id = self._html_search_regex(
|
|
||||||
self._VIDEO_ID_REGEXES, webpage, 'video id')
|
|
||||||
|
|
||||||
data = None
|
srcs = self._graphql_call('''{
|
||||||
|
%s(id: "%s") {
|
||||||
preload_codes = self._html_search_regex(
|
src
|
||||||
r'(function.+)setTimeout\(function\(\)\{playlist',
|
}
|
||||||
webpage, 'preload codes')
|
}''', 'RecordVideoSource', video_id)['src']
|
||||||
base64_fragments = re.findall(r'"([a-zA-Z0-9+/=]+)"', preload_codes)
|
|
||||||
base64_fragments.remove('init')
|
|
||||||
|
|
||||||
def _check_sequence(cur_fragments):
|
|
||||||
if not cur_fragments:
|
|
||||||
return
|
|
||||||
for i in range(len(cur_fragments)):
|
|
||||||
cur_sequence = (''.join(cur_fragments[i:] + cur_fragments[:i])).encode('ascii')
|
|
||||||
try:
|
|
||||||
raw_data = compat_b64decode(cur_sequence)
|
|
||||||
if compat_ord(raw_data[0]) == compat_ord('{'):
|
|
||||||
return json.loads(raw_data.decode('utf-8'))
|
|
||||||
except (TypeError, binascii.Error, UnicodeDecodeError, ValueError):
|
|
||||||
continue
|
|
||||||
|
|
||||||
def _check_data():
|
|
||||||
for i in range(len(base64_fragments) + 1):
|
|
||||||
for j in range(i, len(base64_fragments) + 1):
|
|
||||||
data = _check_sequence(base64_fragments[:i] + base64_fragments[j:])
|
|
||||||
if data:
|
|
||||||
return data
|
|
||||||
|
|
||||||
self.to_screen('Try to compute possible data sequence. This may take some time.')
|
|
||||||
data = _check_data()
|
|
||||||
|
|
||||||
if not data:
|
|
||||||
raise ExtractorError(
|
|
||||||
'Preload information could not be extracted', expected=True)
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
|
get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
|
||||||
for filed in data['files']:
|
for format_id, src in srcs.items():
|
||||||
if determine_ext(filed['url']) == 'm3u8':
|
if not isinstance(src, dict):
|
||||||
# compat_urllib_parse.urljoin does not work here
|
|
||||||
if filed['url'].startswith('/'):
|
|
||||||
m3u8_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + filed['url']
|
|
||||||
else:
|
|
||||||
m3u8_url = filed['url']
|
|
||||||
m3u8_formats = self._extract_m3u8_formats(
|
|
||||||
m3u8_url, video_id, ext='mp4')
|
|
||||||
for m3u8_format in m3u8_formats:
|
|
||||||
if m3u8_format not in formats:
|
|
||||||
formats.append(m3u8_format)
|
|
||||||
elif determine_ext(filed['url']) == 'f4m':
|
|
||||||
# TODO Correct f4m extraction
|
|
||||||
continue
|
continue
|
||||||
|
src_url = src.get('src')
|
||||||
|
if not src_url:
|
||||||
|
continue
|
||||||
|
ext = determine_ext(src_url, mimetype2ext(src.get('type')))
|
||||||
|
if format_id == 'hls' or ext == 'm3u8':
|
||||||
|
# compat_urllib_parse.urljoin does not work here
|
||||||
|
if src_url.startswith('/'):
|
||||||
|
src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
|
||||||
else:
|
else:
|
||||||
if filed['url'].startswith('/mp4:protected/'):
|
if src_url.startswith('/mp4:protected/'):
|
||||||
# TODO Correct extraction for these files
|
# TODO Correct extraction for these files
|
||||||
continue
|
continue
|
||||||
m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
|
tbr = int_or_none(self._search_regex(
|
||||||
if m_format is not None:
|
r'(\d+)k\.mp4', src_url, 'tbr', default=None))
|
||||||
format_id = m_format.group(1)
|
|
||||||
else:
|
|
||||||
format_id = filed['bitrate']
|
|
||||||
tbr = (
|
|
||||||
int(filed['bitrate'])
|
|
||||||
if filed['bitrate'].isdigit()
|
|
||||||
else None)
|
|
||||||
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': filed['url'],
|
'url': src_url,
|
||||||
'ext': 'mp4',
|
'ext': ext,
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'quality': get_quality(format_id),
|
'quality': get_quality(format_id),
|
||||||
})
|
})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'title': data['title'],
|
'title': record['title'],
|
||||||
'thumbnail': data.get('thumb', {}).get('href'),
|
'thumbnail': record.get('thumb', {}).get('preview'),
|
||||||
'description': data.get('teaser'),
|
'description': record.get('teaser'),
|
||||||
'duration': data.get('duration'),
|
'duration': parse_duration(record.get('duration')),
|
||||||
'age_limit': self._family_friendly_search(webpage),
|
'timestamp': parse_iso8601(record.get('publishOn')),
|
||||||
}
|
}
|
||||||
|
@ -8,6 +8,7 @@ import random
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
|
compat_kwargs,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_parse_urlencode,
|
compat_urllib_parse_urlencode,
|
||||||
@ -16,11 +17,14 @@ from ..compat import (
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
|
||||||
orderedSet,
|
orderedSet,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
qualities,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
@ -45,10 +49,11 @@ class TwitchBaseIE(InfoExtractor):
|
|||||||
'%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
|
'%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
def _call_api(self, path, item_id, note):
|
def _call_api(self, path, item_id, *args, **kwargs):
|
||||||
|
kwargs.setdefault('headers', {})['Client-ID'] = self._CLIENT_ID
|
||||||
response = self._download_json(
|
response = self._download_json(
|
||||||
'%s/%s' % (self._API_BASE, path), item_id, note,
|
'%s/%s' % (self._API_BASE, path), item_id,
|
||||||
headers={'Client-ID': self._CLIENT_ID})
|
*args, **compat_kwargs(kwargs))
|
||||||
self._handle_error(response)
|
self._handle_error(response)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
@ -622,21 +627,23 @@ class TwitchStreamIE(TwitchBaseIE):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class TwitchClipsIE(InfoExtractor):
|
class TwitchClipsIE(TwitchBaseIE):
|
||||||
IE_NAME = 'twitch:clips'
|
IE_NAME = 'twitch:clips'
|
||||||
_VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://clips.twitch.tv/ea/AggressiveCobraPoooound',
|
'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
|
||||||
'md5': '761769e1eafce0ffebfb4089cb3847cd',
|
'md5': '761769e1eafce0ffebfb4089cb3847cd',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'AggressiveCobraPoooound',
|
'id': '42850523',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'EA Play 2016 Live from the Novo Theatre',
|
'title': 'EA Play 2016 Live from the Novo Theatre',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'timestamp': 1465767393,
|
||||||
|
'upload_date': '20160612',
|
||||||
'creator': 'EA',
|
'creator': 'EA',
|
||||||
'uploader': 'stereotype_',
|
'uploader': 'stereotype_',
|
||||||
'uploader_id': 'stereotype_',
|
'uploader_id': '43566419',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# multiple formats
|
# multiple formats
|
||||||
@ -647,34 +654,63 @@ class TwitchClipsIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
status = self._download_json(
|
||||||
|
'https://clips.twitch.tv/api/v2/clips/%s/status' % video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
clip = self._parse_json(
|
formats = []
|
||||||
self._search_regex(
|
|
||||||
r'(?s)clipInfo\s*=\s*({.+?});', webpage, 'clip info'),
|
|
||||||
video_id, transform_source=js_to_json)
|
|
||||||
|
|
||||||
title = clip.get('title') or clip.get('channel_title') or self._og_search_title(webpage)
|
for option in status['quality_options']:
|
||||||
|
if not isinstance(option, dict):
|
||||||
formats = [{
|
continue
|
||||||
'url': option['source'],
|
source = option.get('source')
|
||||||
|
if not source or not isinstance(source, compat_str):
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': source,
|
||||||
'format_id': option.get('quality'),
|
'format_id': option.get('quality'),
|
||||||
'height': int_or_none(option.get('quality')),
|
'height': int_or_none(option.get('quality')),
|
||||||
} for option in clip.get('quality_options', []) if option.get('source')]
|
'fps': int_or_none(option.get('frame_rate')),
|
||||||
|
})
|
||||||
if not formats:
|
|
||||||
formats = [{
|
|
||||||
'url': clip['clip_video_url'],
|
|
||||||
}]
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
info = {
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
|
||||||
'creator': clip.get('broadcaster_display_name') or clip.get('broadcaster_login'),
|
|
||||||
'uploader': clip.get('curator_login'),
|
|
||||||
'uploader_id': clip.get('curator_display_name'),
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
clip = self._call_api(
|
||||||
|
'kraken/clips/%s' % video_id, video_id, fatal=False, headers={
|
||||||
|
'Accept': 'application/vnd.twitchtv.v5+json',
|
||||||
|
})
|
||||||
|
|
||||||
|
if clip:
|
||||||
|
quality_key = qualities(('tiny', 'small', 'medium'))
|
||||||
|
thumbnails = []
|
||||||
|
thumbnails_dict = clip.get('thumbnails')
|
||||||
|
if isinstance(thumbnails_dict, dict):
|
||||||
|
for thumbnail_id, thumbnail_url in thumbnails_dict.items():
|
||||||
|
thumbnails.append({
|
||||||
|
'id': thumbnail_id,
|
||||||
|
'url': thumbnail_url,
|
||||||
|
'preference': quality_key(thumbnail_id),
|
||||||
|
})
|
||||||
|
|
||||||
|
info.update({
|
||||||
|
'id': clip.get('tracking_id') or video_id,
|
||||||
|
'title': clip.get('title') or video_id,
|
||||||
|
'duration': float_or_none(clip.get('duration')),
|
||||||
|
'views': int_or_none(clip.get('views')),
|
||||||
|
'timestamp': unified_timestamp(clip.get('created_at')),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'creator': try_get(clip, lambda x: x['broadcaster']['display_name'], compat_str),
|
||||||
|
'uploader': try_get(clip, lambda x: x['curator']['display_name'], compat_str),
|
||||||
|
'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
info.update({
|
||||||
|
'title': video_id,
|
||||||
|
'id': video_id,
|
||||||
|
})
|
||||||
|
|
||||||
|
return info
|
||||||
|
@ -18,6 +18,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
@ -105,7 +106,7 @@ class UdemyIE(InfoExtractor):
|
|||||||
% (course_id, lecture_id),
|
% (course_id, lecture_id),
|
||||||
lecture_id, 'Downloading lecture JSON', query={
|
lecture_id, 'Downloading lecture JSON', query={
|
||||||
'fields[lecture]': 'title,description,view_html,asset',
|
'fields[lecture]': 'title,description,view_html,asset',
|
||||||
'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,data',
|
'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data',
|
||||||
})
|
})
|
||||||
|
|
||||||
def _handle_error(self, response):
|
def _handle_error(self, response):
|
||||||
@ -303,9 +304,25 @@ class UdemyIE(InfoExtractor):
|
|||||||
'url': src,
|
'url': src,
|
||||||
})
|
})
|
||||||
|
|
||||||
download_urls = asset.get('download_urls')
|
for url_kind in ('download', 'stream'):
|
||||||
if isinstance(download_urls, dict):
|
urls = asset.get('%s_urls' % url_kind)
|
||||||
extract_formats(download_urls.get('Video'))
|
if isinstance(urls, dict):
|
||||||
|
extract_formats(urls.get('Video'))
|
||||||
|
|
||||||
|
captions = asset.get('captions')
|
||||||
|
if isinstance(captions, list):
|
||||||
|
for cc in captions:
|
||||||
|
if not isinstance(cc, dict):
|
||||||
|
continue
|
||||||
|
cc_url = cc.get('url')
|
||||||
|
if not cc_url or not isinstance(cc_url, compat_str):
|
||||||
|
continue
|
||||||
|
lang = try_get(cc, lambda x: x['locale']['locale'], compat_str)
|
||||||
|
sub_dict = (automatic_captions if cc.get('source') == 'auto'
|
||||||
|
else subtitles)
|
||||||
|
sub_dict.setdefault(lang or 'en', []).append({
|
||||||
|
'url': cc_url,
|
||||||
|
})
|
||||||
|
|
||||||
view_html = lecture.get('view_html')
|
view_html = lecture.get('view_html')
|
||||||
if view_html:
|
if view_html:
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2018.05.01'
|
__version__ = '2018.05.09'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user