diff --git a/ChangeLog b/ChangeLog index d4f809fc6..d2f17ee06 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,55 @@ +version + +Core +* [utils] Improve str_to_int ++ [downloader/hls] Add ability to override AES decryption key URL (#17521) + +Extractors ++ [tv2dk:bornholm:play] Add support for play.tv2bornholm.dk (#23291) ++ [slideslive] Add support for url and vimeo service names (#23414) +* [slideslive] Fix extraction (#23413) +* [twitch:clips] Fix extraction (#23375) ++ [soundcloud] Add support for token protected embeds (#18954) +* [vk] Improve extraction + * Fix User Videos extraction (#23356) + * Extract all videos for lists with more than 1000 videos (#23356) + + Add support for video albums (#14327, #14492) +- [kontrtube] Remove extractor +- [videopremium] Remove extractor +- [musicplayon] Remove extractor (#9225) ++ [ufctv] Add support for ufcfightpass.imgdge.com and + ufcfightpass.imggaming.com (#23343) ++ [twitch] Extract m3u8 formats frame rate (#23333) ++ [imggaming] Add support for playlists and extract subtitles ++ [ufcarabia] Add support for UFC Arabia (#23312) +* [ufctv] Fix extraction +* [yahoo] Fix gyao brightcove player id (#23303) +* [vzaar] Override AES decryption key URL (#17521) ++ [vzaar] Add support for AES HLS manifests (#17521, #23299) +* [nrl] Fix extraction +* [teachingchannel] Fix extraction +* [nintendo] Fix extraction and partially add support for Nintendo Direct + videos (#4592) ++ [ooyala] Add better fallback values for domain and streams variables ++ [youtube] Add support youtubekids.com (#23272) +* [tv2] Detect DRM protection ++ [tv2] Add support for katsomo.fi and mtv.fi (#10543) +* [tv2] Fix tv2.no article extraction +* [msn] Improve extraction + + Add support for YouTube and NBCSports embeds + + Add support for articles with multiple videos + * Improve AOL embed support + * Improve format extraction +* [abcotvs] Relax URL regular expression and improve metadata extraction + (#18014) +* [channel9] Reduce response size +* [adobetv] Improve extaction + * Use OnDemandPagedList for list extractors + * Reduce show extraction requests + * Extract original video format and subtitles + + Add support for adobe tv embeds + + version 2019.11.28 Core @@ -583,7 +635,7 @@ Extractors version 2019.04.17 Extractors -* [openload] Randomize User-Agent (closes #20688) +* [openload] Randomize User-Agent (#20688) + [openload] Add support for oladblock domains (#20471) * [adn] Fix subtitle extraction (#12724) + [aol] Add support for localized websites @@ -1148,7 +1200,7 @@ Extractors + [youtube] Extract channel meta fields (#9676, #12939) * [porntube] Fix extraction (#17541) * [asiancrush] Fix extraction (#15630) -+ [twitch:clips] Extend URL regular expression (closes #17559) ++ [twitch:clips] Extend URL regular expression (#17559) + [vzaar] Add support for HLS * [tube8] Fix metadata extraction (#17520) * [eporner] Extract JSON-LD (#17519) diff --git a/test/test_utils.py b/test/test_utils.py index fed94a906..0896f4150 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -500,6 +500,11 @@ class TestUtil(unittest.TestCase): self.assertEqual(str_to_int('123,456'), 123456) self.assertEqual(str_to_int('123.456'), 123456) self.assertEqual(str_to_int(523), 523) + # Python 3 has no long + if sys.version_info < (3, 0): + eval('self.assertEqual(str_to_int(123456L), 123456)') + self.assertEqual(str_to_int('noninteger'), None) + self.assertEqual(str_to_int([]), None) def test_url_basename(self): self.assertEqual(url_basename('http://foo.de/'), '') diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index fd93730fa..376d07727 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1168,7 +1168,10 @@ from .tv2 import ( TV2ArticleIE, KatsomoIE, ) -from .tv2dk import TV2DKIE +from .tv2dk import ( + TV2DKIE, + TV2DKBornholmPlayIE, +) from .tv2hu import TV2HuIE from .tv4 import TV4IE from .tv5mondeplus import TV5MondePlusIE diff --git a/youtube_dl/extractor/slideslive.py b/youtube_dl/extractor/slideslive.py index ed84322c5..d9ea76831 100644 --- a/youtube_dl/extractor/slideslive.py +++ b/youtube_dl/extractor/slideslive.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import smuggle_url class SlidesLiveIE(InfoExtractor): @@ -14,9 +14,9 @@ class SlidesLiveIE(InfoExtractor): 'info_dict': { 'id': 'LMtgR8ba0b0', 'ext': 'mp4', - 'title': '38902413: external video', - 'description': '3890241320170925-9-1yd6ech.mp4', - 'uploader': 'SlidesLive Administrator', + 'title': 'GCC IA16 backend', + 'description': 'Watch full version of this video at https://slideslive.com/38902413.', + 'uploader': 'SlidesLive Videos - A', 'uploader_id': 'UC62SdArr41t_-_fX40QCLRw', 'upload_date': '20170925', } @@ -24,16 +24,38 @@ class SlidesLiveIE(InfoExtractor): # video_service_name = youtube 'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend', 'only_matching': True, + }, { + # video_service_name = url + 'url': 'https://slideslive.com/38922070/learning-transferable-skills-1', + 'only_matching': True, + }, { + # video_service_name = vimeo + 'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) video_data = self._download_json( - url, video_id, headers={'Accept': 'application/json'}) + 'https://ben.slideslive.com/player/' + video_id, video_id) service_name = video_data['video_service_name'].lower() - if service_name == 'youtube': - yt_video_id = video_data['video_service_id'] - return self.url_result(yt_video_id, 'Youtube', video_id=yt_video_id) + assert service_name in ('url', 'vimeo', 'youtube') + service_id = video_data['video_service_id'] + info = { + 'id': video_id, + 'thumbnail': video_data.get('thumbnail'), + 'url': service_id, + } + if service_name == 'url': + info['title'] = video_data['title'] else: - raise ExtractorError( - 'Unsupported service name: {0}'.format(service_name), expected=True) + info.update({ + '_type': 'url_transparent', + 'ie_key': service_name.capitalize(), + 'title': video_data.get('title'), + }) + if service_name == 'vimeo': + info['url'] = smuggle_url( + 'https://player.vimeo.com/video/' + service_id, + {'http_headers': {'Referer': url}}) + return info diff --git a/youtube_dl/extractor/tv2dk.py b/youtube_dl/extractor/tv2dk.py index eb39424df..611fdc0c6 100644 --- a/youtube_dl/extractor/tv2dk.py +++ b/youtube_dl/extractor/tv2dk.py @@ -1,10 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals +import json import re from .common import InfoExtractor -from ..utils import extract_attributes +from ..utils import ( + determine_ext, + extract_attributes, + js_to_json, + url_or_none, +) class TV2DKIE(InfoExtractor): @@ -80,3 +86,69 @@ class TV2DKIE(InfoExtractor): 'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura', video_id=kaltura_id)) return self.playlist_result(entries) + + +class TV2DKBornholmPlayIE(InfoExtractor): + _VALID_URL = r'https?://play\.tv2bornholm\.dk/\?.*?\bid=(?P\d+)' + _TEST = { + 'url': 'http://play.tv2bornholm.dk/?area=specifikTV&id=781021', + 'info_dict': { + 'id': '781021', + 'ext': 'mp4', + 'title': '12Nyheder-27.11.19', + }, + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_json( + 'http://play.tv2bornholm.dk/controls/AJAX.aspx/specifikVideo', video_id, + data=json.dumps({ + 'playlist_id': video_id, + 'serienavn': '', + }).encode(), headers={ + 'X-Requested-With': 'XMLHttpRequest', + 'Content-Type': 'application/json; charset=UTF-8', + })['d'] + + # TODO: generalize flowplayer + title = self._search_regex( + r'title\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', video, 'title', + group='value') + sources = self._parse_json(self._search_regex( + r'(?s)sources:\s*(\[.+?\]),', video, 'sources'), + video_id, js_to_json) + + formats = [] + srcs = set() + for source in sources: + src = url_or_none(source.get('src')) + if not src: + continue + if src in srcs: + continue + srcs.add(src) + ext = determine_ext(src) + src_type = source.get('type') + if src_type == 'application/x-mpegurl' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + src, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif src_type == 'application/dash+xml' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + src, video_id, mpd_id='dash', fatal=False)) + else: + formats.append({ + 'url': src, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + } diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 1f3df3112..a8c2502af 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -17,12 +17,10 @@ from ..compat import ( from ..utils import ( clean_html, ExtractorError, - float_or_none, int_or_none, orderedSet, parse_duration, parse_iso8601, - qualities, try_get, unified_timestamp, update_url_query, @@ -676,63 +674,81 @@ class TwitchClipsIE(TwitchBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - status = self._download_json( - 'https://clips.twitch.tv/api/v2/clips/%s/status' % video_id, - video_id) + clip = self._download_json( + 'https://gql.twitch.tv/gql', video_id, data=json.dumps({ + 'query': '''{ + clip(slug: "%s") { + broadcaster { + displayName + } + createdAt + curator { + displayName + id + } + durationSeconds + id + tiny: thumbnailURL(width: 86, height: 45) + small: thumbnailURL(width: 260, height: 147) + medium: thumbnailURL(width: 480, height: 272) + title + videoQualities { + frameRate + quality + sourceURL + } + viewCount + } +}''' % video_id, + }).encode(), headers={ + 'Client-ID': self._CLIENT_ID, + })['data']['clip'] + + if not clip: + raise ExtractorError( + 'This clip is no longer available', expected=True) formats = [] - - for option in status['quality_options']: + for option in clip.get('videoQualities', []): if not isinstance(option, dict): continue - source = url_or_none(option.get('source')) + source = url_or_none(option.get('sourceURL')) if not source: continue formats.append({ 'url': source, 'format_id': option.get('quality'), 'height': int_or_none(option.get('quality')), - 'fps': int_or_none(option.get('frame_rate')), + 'fps': int_or_none(option.get('frameRate')), }) - self._sort_formats(formats) - info = { + thumbnails = [] + for thumbnail_id in ('tiny', 'small', 'medium'): + thumbnail_url = clip.get(thumbnail_id) + if not thumbnail_url: + continue + thumb = { + 'id': thumbnail_id, + 'url': thumbnail_url, + } + mobj = re.search(r'-(\d+)x(\d+)\.', thumbnail_url) + if mobj: + thumb.update({ + 'height': int(mobj.group(2)), + 'width': int(mobj.group(1)), + }) + thumbnails.append(thumb) + + return { + 'id': clip.get('id') or video_id, + 'title': clip.get('title') or video_id, 'formats': formats, + 'duration': int_or_none(clip.get('durationSeconds')), + 'views': int_or_none(clip.get('viewCount')), + 'timestamp': unified_timestamp(clip.get('createdAt')), + 'thumbnails': thumbnails, + 'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], compat_str), + 'uploader': try_get(clip, lambda x: x['curator']['displayName'], compat_str), + 'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str), } - - clip = self._call_api( - 'kraken/clips/%s' % video_id, video_id, fatal=False, headers={ - 'Accept': 'application/vnd.twitchtv.v5+json', - }) - - if clip: - quality_key = qualities(('tiny', 'small', 'medium')) - thumbnails = [] - thumbnails_dict = clip.get('thumbnails') - if isinstance(thumbnails_dict, dict): - for thumbnail_id, thumbnail_url in thumbnails_dict.items(): - thumbnails.append({ - 'id': thumbnail_id, - 'url': thumbnail_url, - 'preference': quality_key(thumbnail_id), - }) - - info.update({ - 'id': clip.get('tracking_id') or video_id, - 'title': clip.get('title') or video_id, - 'duration': float_or_none(clip.get('duration')), - 'views': int_or_none(clip.get('views')), - 'timestamp': unified_timestamp(clip.get('created_at')), - 'thumbnails': thumbnails, - 'creator': try_get(clip, lambda x: x['broadcaster']['display_name'], compat_str), - 'uploader': try_get(clip, lambda x: x['curator']['display_name'], compat_str), - 'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str), - }) - else: - info.update({ - 'title': video_id, - 'id': video_id, - }) - - return info diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 328f037a8..f6204692a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -46,6 +46,7 @@ from .compat import ( compat_html_entities, compat_html_entities_html5, compat_http_client, + compat_integer_types, compat_kwargs, compat_os_name, compat_parse_qs, @@ -3519,10 +3520,11 @@ def str_or_none(v, default=None): def str_to_int(int_str): """ A more relaxed version of int_or_none """ - if not isinstance(int_str, compat_str): + if isinstance(int_str, compat_integer_types): return int_str - int_str = re.sub(r'[,\.\+]', '', int_str) - return int(int_str) + elif isinstance(int_str, compat_str): + int_str = re.sub(r'[,\.\+]', '', int_str) + return int_or_none(int_str) def float_or_none(v, scale=1, invscale=1, default=None):