From 9b5aead6aa8ad82a5eecd2bc26c0e94399e92ca7 Mon Sep 17 00:00:00 2001 From: Timmy Date: Sat, 14 Apr 2018 17:04:42 +0200 Subject: [PATCH 01/47] [vine:user] Fix extraction (closes #15514) --- youtube_dl/extractor/vine.py | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index 46950d3a1..08ddffa66 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import re -import itertools from .common import InfoExtractor from ..utils import ( @@ -116,14 +115,14 @@ class VineUserIE(InfoExtractor): _VINE_BASE_URL = 'https://vine.co/' _TESTS = [ { - 'url': 'https://vine.co/Visa', + 'url': 'https://vine.co/itsruthb', 'info_dict': { - 'id': 'Visa', + 'id': 'itsruthb', }, - 'playlist_mincount': 46, + 'playlist_mincount': 611, }, { - 'url': 'https://vine.co/u/941705360593584128', + 'url': 'https://vine.co/u/942914934646415360', 'only_matching': True, }, ] @@ -139,16 +138,10 @@ class VineUserIE(InfoExtractor): profile_url, user, note='Downloading user profile data') user_id = profile_data['data']['userId'] - timeline_data = [] - for pagenum in itertools.count(1): - timeline_url = '%sapi/timelines/users/%s?page=%s&size=100' % ( - self._VINE_BASE_URL, user_id, pagenum) - timeline_page = self._download_json( - timeline_url, user, note='Downloading page %d' % pagenum) - timeline_data.extend(timeline_page['data']['records']) - if timeline_page['data']['nextPage'] is None: - break - + user_archive = self._download_json( + 'https://archive.vine.co/profiles/%s.json' % user_id, user_id) + posts = user_archive['posts'] entries = [ - self.url_result(e['permalinkUrl'], 'Vine') for e in timeline_data] + self.url_result('https://vine.co/v/%s' % post_id, 'Vine') + for post_id in posts] return self.playlist_result(entries, user) From 8e41c9ad01b6deda96c29f685c4d8861b8759ba5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Apr 2018 22:43:25 +0700 Subject: [PATCH 02/47] [vine:user] Improve extraction (closes #16190) --- youtube_dl/extractor/vine.py | 45 +++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index 08ddffa66..80b896b56 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( determine_ext, int_or_none, @@ -111,21 +112,24 @@ class VineIE(InfoExtractor): class VineUserIE(InfoExtractor): IE_NAME = 'vine:user' - _VALID_URL = r'(?:https?://)?vine\.co/(?Pu/)?(?P[^/]+)/?(\?.*)?$' + _VALID_URL = r'https?://vine\.co/(?Pu/)?(?P[^/]+)' _VINE_BASE_URL = 'https://vine.co/' - _TESTS = [ - { - 'url': 'https://vine.co/itsruthb', - 'info_dict': { - 'id': 'itsruthb', - }, - 'playlist_mincount': 611, + _TESTS = [{ + 'url': 'https://vine.co/itsruthb', + 'info_dict': { + 'id': 'itsruthb', + 'title': 'Ruth B', + 'description': '| Instagram/Twitter: itsruthb | still a lost boy from neverland', }, - { - 'url': 'https://vine.co/u/942914934646415360', - 'only_matching': True, - }, - ] + 'playlist_mincount': 611, + }, { + 'url': 'https://vine.co/u/942914934646415360', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if VineIE.suitable(url) else super(VineUserIE, cls).suitable(url) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -137,11 +141,14 @@ class VineUserIE(InfoExtractor): profile_data = self._download_json( profile_url, user, note='Downloading user profile data') - user_id = profile_data['data']['userId'] - user_archive = self._download_json( + data = profile_data['data'] + user_id = data.get('userId') or data['userIdStr'] + profile = self._download_json( 'https://archive.vine.co/profiles/%s.json' % user_id, user_id) - posts = user_archive['posts'] entries = [ - self.url_result('https://vine.co/v/%s' % post_id, 'Vine') - for post_id in posts] - return self.playlist_result(entries, user) + self.url_result( + 'https://vine.co/v/%s' % post_id, ie='Vine', video_id=post_id) + for post_id in profile['posts'] + if post_id and isinstance(post_id, compat_str)] + return self.playlist_result( + entries, user, profile.get('username'), profile.get('description')) From d6166a7602f5b78a4bb552ba0f4b176cbc0a4a03 Mon Sep 17 00:00:00 2001 From: Patrick Griffis Date: Tue, 21 Mar 2017 00:49:31 +0200 Subject: [PATCH 03/47] [picarto] Add extractor --- youtube_dl/extractor/extractors.py | 4 ++ youtube_dl/extractor/picarto.py | 87 ++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100755 youtube_dl/extractor/picarto.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c9f60114d..d83e93dec 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -815,6 +815,10 @@ from .periscope import ( from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE +from .picarto import ( + PicartoVodIE, + PicartoIE, +) from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pladform import PladformIE diff --git a/youtube_dl/extractor/picarto.py b/youtube_dl/extractor/picarto.py new file mode 100755 index 000000000..1d6f714ed --- /dev/null +++ b/youtube_dl/extractor/picarto.py @@ -0,0 +1,87 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ExtractorError, js_to_json, urlencode_postdata + + +class PicartoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P[a-zA-Z0-9]+)[^/]*$' + _TEST = { + 'url': 'https://picarto.tv/Setz', + 'info_dict': { + 'id': 'Setz', + 'ext': 'mp4', + 'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'timestamp': int, + 'is_live': True + }, + 'params': { + 'skip_download': True + } + } + + def _real_extract(self, url): + channel_id = self._match_id(url) + stream_page = self._download_webpage(url, channel_id) + + if 'This channel does not exist.' in stream_page: + raise ExtractorError('Channel does not exist', expected=True) + + player_settings_js = self._html_search_regex( + r'(?s)playerSettings\[1\]\s*=\s*(\{.+?\}\n)', stream_page, 'player-settings') + player_settings = self._parse_json(player_settings_js, channel_id, + transform_source=js_to_json) + if not player_settings.get('online'): + raise ExtractorError('Stream is offline', expected=True) + + cdn_data = self._download_json('https://picarto.tv/process/channel', channel_id, + data=urlencode_postdata({'loadbalancinginfo': channel_id}), + note='Fetching load balancer info') + edge = [edge['ep'] for edge in cdn_data['edges'] if edge['id'] == cdn_data['preferedEdge']][0] + + formats = self._extract_m3u8_formats('https://%s/hls/%s/index.m3u8' % (edge, channel_id), + channel_id, 'mp4') + formats.append({'url': 'https://%s/mp4/%s.mp4' % (edge, channel_id)}) + self._sort_formats(formats) + + return { + 'id': channel_id, + 'formats': formats, + 'ext': 'mp4', + 'title': self._live_title(channel_id), + 'is_live': True, + 'thumbnail': player_settings.get('vodThumb'), + 'age_limit': 18 if player_settings.get('mature') else None, + } + + +class PicartoVodIE(InfoExtractor): + _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P[a-zA-Z0-9_\-\.]+).flv' + _TEST = { + 'url': 'https://picarto.tv/videopopout/Carrot_2018.01.11.07.55.12.flv', + 'md5': '80765b67813053ff31d4df2bd5e900ce', + 'info_dict': { + 'id': 'Carrot_2018.01.11.07.55.12', + 'ext': 'mp4', + 'title': 'Carrot_2018.01.11.07.55.12', + 'thumbnail': r're:^https?://.*\.jpg$' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + vod_info_js = self._html_search_regex(r'(?s)"#vod-player",\s*(\{.+?\})\)', + webpage, video_id) + vod_info = self._parse_json(vod_info_js, video_id, transform_source=js_to_json) + + return { + 'id': video_id, + 'title': video_id, + 'ext': 'mp4', + 'protocol': 'm3u8', + 'url': vod_info['vod'], + 'thumbnail': vod_info.get('vodThumb'), + } From a42839e548d81ae20e5164ae690075d2c423477e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Apr 2018 00:31:25 +0700 Subject: [PATCH 04/47] [picarto] Improve extraction (closes #6205, closes #12514, closes #15276, closes #15551) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/picarto.py | 152 ++++++++++++++++++++++------- 2 files changed, 116 insertions(+), 38 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d83e93dec..3570fa165 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -816,8 +816,8 @@ from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE from .picarto import ( - PicartoVodIE, PicartoIE, + PicartoVodIE, ) from .piksel import PikselIE from .pinkbike import PinkbikeIE diff --git a/youtube_dl/extractor/picarto.py b/youtube_dl/extractor/picarto.py index 1d6f714ed..2366dfb34 100755 --- a/youtube_dl/extractor/picarto.py +++ b/youtube_dl/extractor/picarto.py @@ -1,12 +1,21 @@ # coding: utf-8 from __future__ import unicode_literals +import time + from .common import InfoExtractor -from ..utils import ExtractorError, js_to_json, urlencode_postdata +from ..compat import compat_str +from ..utils import ( + ExtractorError, + js_to_json, + try_get, + update_url_query, + urlencode_postdata, +) class PicartoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P[a-zA-Z0-9]+)[^/]*$' + _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P[a-zA-Z0-9]+)' _TEST = { 'url': 'https://picarto.tv/Setz', 'info_dict': { @@ -16,72 +25,141 @@ class PicartoIE(InfoExtractor): 'timestamp': int, 'is_live': True }, - 'params': { - 'skip_download': True - } + 'skip': 'Stream is offline', } + @classmethod + def suitable(cls, url): + return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url) + def _real_extract(self, url): channel_id = self._match_id(url) stream_page = self._download_webpage(url, channel_id) - if 'This channel does not exist.' in stream_page: - raise ExtractorError('Channel does not exist', expected=True) + if '>This channel does not exist' in stream_page: + raise ExtractorError( + 'Channel %s does not exist' % channel_id, expected=True) - player_settings_js = self._html_search_regex( - r'(?s)playerSettings\[1\]\s*=\s*(\{.+?\}\n)', stream_page, 'player-settings') - player_settings = self._parse_json(player_settings_js, channel_id, - transform_source=js_to_json) - if not player_settings.get('online'): + player = self._parse_json( + self._search_regex( + r'(?s)playerSettings\[\d+\]\s*=\s*(\{.+?\}\s*\n)', stream_page, + 'player settings'), + channel_id, transform_source=js_to_json) + + if player.get('online') is False: raise ExtractorError('Stream is offline', expected=True) - cdn_data = self._download_json('https://picarto.tv/process/channel', channel_id, + cdn_data = self._download_json( + 'https://picarto.tv/process/channel', channel_id, data=urlencode_postdata({'loadbalancinginfo': channel_id}), - note='Fetching load balancer info') - edge = [edge['ep'] for edge in cdn_data['edges'] if edge['id'] == cdn_data['preferedEdge']][0] + note='Downloading load balancing info') - formats = self._extract_m3u8_formats('https://%s/hls/%s/index.m3u8' % (edge, channel_id), - channel_id, 'mp4') - formats.append({'url': 'https://%s/mp4/%s.mp4' % (edge, channel_id)}) + def get_event(key): + return try_get(player, lambda x: x['event'][key], compat_str) or '' + + params = { + 'token': player.get('token') or '', + 'ticket': get_event('ticket'), + 'con': int(time.time() * 1000), + 'type': get_event('ticket'), + 'scope': get_event('scope'), + } + + prefered_edge = cdn_data.get('preferedEdge') + default_tech = player.get('defaultTech') + + formats = [] + + for edge in cdn_data['edges']: + edge_ep = edge.get('ep') + if not edge_ep or not isinstance(edge_ep, compat_str): + continue + edge_id = edge.get('id') + for tech in cdn_data['techs']: + tech_label = tech.get('label') + tech_type = tech.get('type') + preference = 0 + if edge_id == prefered_edge: + preference += 1 + if tech_type == default_tech: + preference += 1 + format_id = [] + if edge_id: + format_id.append(edge_id) + if tech_type == 'application/x-mpegurl' or tech_label == 'HLS': + format_id.append('hls') + formats.extend(self._extract_m3u8_formats( + update_url_query( + 'https://%s/hls/%s/index.m3u8' + % (edge_ep, channel_id), params), + channel_id, 'mp4', preference=preference, + m3u8_id='-'.join(format_id), fatal=False)) + continue + elif tech_type == 'video/mp4' or tech_label == 'MP4': + format_id.append('mp4') + formats.append({ + 'url': update_url_query( + 'https://%s/mp4/%s.mp4' % (edge_ep, channel_id), + params), + 'format_id': '-'.join(format_id), + 'preference': preference, + }) + else: + # rtmp format does not seem to work + continue self._sort_formats(formats) + mature = player.get('mature') + if mature is None: + age_limit = None + else: + age_limit = 18 if mature is True else 0 + return { 'id': channel_id, - 'formats': formats, - 'ext': 'mp4', 'title': self._live_title(channel_id), 'is_live': True, - 'thumbnail': player_settings.get('vodThumb'), - 'age_limit': 18 if player_settings.get('mature') else None, + 'thumbnail': player.get('vodThumb'), + 'age_limit': age_limit, + 'formats': formats, } class PicartoVodIE(InfoExtractor): - _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P[a-zA-Z0-9_\-\.]+).flv' - _TEST = { - 'url': 'https://picarto.tv/videopopout/Carrot_2018.01.11.07.55.12.flv', - 'md5': '80765b67813053ff31d4df2bd5e900ce', + _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv', + 'md5': '3ab45ba4352c52ee841a28fb73f2d9ca', 'info_dict': { - 'id': 'Carrot_2018.01.11.07.55.12', + 'id': 'ArtofZod_2017.12.12.00.13.23.flv', 'ext': 'mp4', - 'title': 'Carrot_2018.01.11.07.55.12', - 'thumbnail': r're:^https?://.*\.jpg$' - } - } + 'title': 'ArtofZod_2017.12.12.00.13.23.flv', + 'thumbnail': r're:^https?://.*\.jpg' + }, + }, { + 'url': 'https://picarto.tv/videopopout/Plague', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - vod_info_js = self._html_search_regex(r'(?s)"#vod-player",\s*(\{.+?\})\)', - webpage, video_id) - vod_info = self._parse_json(vod_info_js, video_id, transform_source=js_to_json) + vod_info = self._parse_json( + self._search_regex( + r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage, + video_id), + video_id, transform_source=js_to_json) + + formats = self._extract_m3u8_formats( + vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + self._sort_formats(formats) return { 'id': video_id, 'title': video_id, - 'ext': 'mp4', - 'protocol': 'm3u8', - 'url': vod_info['vod'], 'thumbnail': vod_info.get('vodThumb'), + 'formats': formats, } From c07cb68e7974a2ecd94f4101e6f094414df16e75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Apr 2018 00:54:21 +0700 Subject: [PATCH 05/47] [smotri:broadcast] Fix extraction (closes #16180) --- youtube_dl/extractor/smotri.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index 370fa8879..45995f30f 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -310,6 +310,7 @@ class SmotriBroadcastIE(InfoExtractor): IE_DESC = 'Smotri.com broadcasts' IE_NAME = 'smotri:broadcast' _VALID_URL = r'https?://(?:www\.)?(?Psmotri\.com/live/(?P[^/]+))/?.*' + _NETRC_MACHINE = 'smotri' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -352,17 +353,18 @@ class SmotriBroadcastIE(InfoExtractor): adult_content = False ticket = self._html_search_regex( - r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'([^']+)'\)", - broadcast_page, 'broadcast ticket') + (r'data-user-file=(["\'])(?P(?!\1).+)\1', + r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'(?P[^']+)'\)"), + broadcast_page, 'broadcast ticket', group='ticket') - url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket + broadcast_url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket broadcast_password = self._downloader.params.get('videopassword') if broadcast_password: - url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest() + broadcast_url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest() broadcast_json_page = self._download_webpage( - url, broadcast_id, 'Downloading broadcast JSON') + broadcast_url, broadcast_id, 'Downloading broadcast JSON') try: broadcast_json = json.loads(broadcast_json_page) From 0e6ccb3905cb86c53a91af4c9119e2fd102019d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Apr 2018 00:56:05 +0700 Subject: [PATCH 06/47] [ChangeLog] Actualize [ci skip] --- ChangeLog | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4385c4091..12bda4951 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +version + +Extractors +* [smotri:broadcast] Fix extraction (#16180) ++ [picarto] Add support for picarto.tv (#6205, #12514, #15276, #15551) +* [vine:user] Fix extraction (#15514, #16190) +* [pornhub] Relax URL regular expression (#16165) +* [cbc:watch] Re-acquire device token when expired (#16160) ++ [fxnetworks] Add support for https theplatform URLs (#16125, #16157) ++ [instagram:user] Add request signing (#16119) ++ [twitch] Add support for mobile URLs (#16146) + + version 2018.04.09 Core From bdf7ba6f3a626b4c873257091d0771e54bd02dfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Apr 2018 01:07:21 +0700 Subject: [PATCH 07/47] Set chmod 644 for all extractors --- youtube_dl/extractor/americastestkitchen.py | 0 youtube_dl/extractor/cda.py | 0 youtube_dl/extractor/joj.py | 0 youtube_dl/extractor/picarto.py | 0 4 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 youtube_dl/extractor/americastestkitchen.py mode change 100755 => 100644 youtube_dl/extractor/cda.py mode change 100755 => 100644 youtube_dl/extractor/joj.py mode change 100755 => 100644 youtube_dl/extractor/picarto.py diff --git a/youtube_dl/extractor/americastestkitchen.py b/youtube_dl/extractor/americastestkitchen.py old mode 100755 new mode 100644 diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py old mode 100755 new mode 100644 diff --git a/youtube_dl/extractor/joj.py b/youtube_dl/extractor/joj.py old mode 100755 new mode 100644 diff --git a/youtube_dl/extractor/picarto.py b/youtube_dl/extractor/picarto.py old mode 100755 new mode 100644 From 3c92fd1cd5b5ced11f03ebe64104457c21cd69ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Apr 2018 01:09:18 +0700 Subject: [PATCH 08/47] release 2018.04.16 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index ed622afd1..69f996179 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.04.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.04.09** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.04.16*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.04.16** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.04.09 +[debug] youtube-dl version 2018.04.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 12bda4951..185fa1753 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2018.04.16 Extractors * [smotri:broadcast] Fix extraction (#16180) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 1c13199d4..715d16cfe 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -628,6 +628,8 @@ - **PhilharmonieDeParis**: Philharmonie de Paris - **phoenix.de** - **Photobucket** + - **Picarto** + - **PicartoVod** - **Piksel** - **Pinkbike** - **Pladform** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 307d6041a..5aefdd0a2 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.04.09' +__version__ = '2018.04.16' From 522d6b5c961f584055463f8c69de864ec075083b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 16 Apr 2018 07:48:36 +0100 Subject: [PATCH 09/47] [cbs] skip DRM asset types(fixes #16104) --- youtube_dl/extractor/cbs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index f425562ab..1799d63ea 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -65,7 +65,7 @@ class CBSIE(CBSBaseIE): last_e = None for item in items_data.findall('.//item'): asset_type = xpath_text(item, 'assetType') - if not asset_type or asset_type in asset_types: + if not asset_type or asset_type in asset_types or asset_type in ('HLS_FPS', 'DASH_CENC'): continue asset_types.append(asset_type) query = { From 238d42cf5d4b1a95ba42bf56dcb1bf559ac11c29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 17 Apr 2018 22:37:50 +0700 Subject: [PATCH 10/47] [instagram:user] Fix extraction (closes #16119) --- youtube_dl/extractor/instagram.py | 49 ++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 8da1d5f2f..5cea37d92 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -6,11 +6,16 @@ import json import re from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_str, + compat_HTTPError, +) from ..utils import ( + ExtractorError, get_element_by_attribute, int_or_none, lowercase_escape, + std_headers, try_get, ) @@ -239,6 +244,8 @@ class InstagramUserIE(InfoExtractor): } } + _gis_tmpl = None + def _entries(self, data): def get_count(suffix): return int_or_none(try_get( @@ -257,16 +264,36 @@ class InstagramUserIE(InfoExtractor): 'first': 100, 'after': cursor, }) - s = '%s:%s:%s' % (rhx_gis, csrf_token, variables) - media = self._download_json( - 'https://www.instagram.com/graphql/query/', uploader_id, - 'Downloading JSON page %d' % page_num, headers={ - 'X-Requested-With': 'XMLHttpRequest', - 'X-Instagram-GIS': hashlib.md5(s.encode('utf-8')).hexdigest(), - }, query={ - 'query_hash': '472f257a40c653c64c666ce877d59d2b', - 'variables': variables, - })['data']['user']['edge_owner_to_timeline_media'] + + if self._gis_tmpl: + gis_tmpls = [self._gis_tmpl] + else: + gis_tmpls = [ + '%s' % rhx_gis, + '', + '%s:%s' % (rhx_gis, csrf_token), + '%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent']), + ] + + for gis_tmpl in gis_tmpls: + try: + media = self._download_json( + 'https://www.instagram.com/graphql/query/', uploader_id, + 'Downloading JSON page %d' % page_num, headers={ + 'X-Requested-With': 'XMLHttpRequest', + 'X-Instagram-GIS': hashlib.md5( + ('%s:%s' % (gis_tmpl, variables)).encode('utf-8')).hexdigest(), + }, query={ + 'query_hash': '42323d64886122307be10013ad2dcc44', + 'variables': variables, + })['data']['user']['edge_owner_to_timeline_media'] + self._gis_tmpl = gis_tmpl + break + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + if gis_tmpl != gis_tmpls[-1]: + continue + raise edges = media.get('edges') if not edges or not isinstance(edges, list): From 518d5ba5191e3cc26c81e346ba5117e94db51469 Mon Sep 17 00:00:00 2001 From: Dan Salmon Date: Tue, 17 Apr 2018 12:10:02 -0500 Subject: [PATCH 11/47] Fix some tests --- test/test_subtitles.py | 4 ++-- test/test_youtube_lists.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 1b8de822a..7d57a628e 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -232,7 +232,7 @@ class TestNPOSubtitles(BaseTestSubtitles): class TestMTVSubtitles(BaseTestSubtitles): - url = 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother' + url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans' IE = ComedyCentralIE def getInfoDict(self): @@ -243,7 +243,7 @@ class TestMTVSubtitles(BaseTestSubtitles): self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), set(['en'])) - self.assertEqual(md5(subtitles['en']), 'b9f6ca22a6acf597ec76f61749765e65') + self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961') class TestNRKSubtitles(BaseTestSubtitles): diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 7a33dbf88..c4f0abbea 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -61,7 +61,7 @@ class TestYoutubeLists(unittest.TestCase): dl = FakeYDL() dl.params['extract_flat'] = True ie = YoutubePlaylistIE(dl) - result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') + result = ie.extract('https://www.youtube.com/playlist?list=PL-KKIb8rvtMSrAO9YFbeM6UQrAqoFTUWv') self.assertIsPlaylist(result) for entry in result['entries']: self.assertTrue(entry.get('title')) From e30991f9206f98605ba6c4880ed40ad5556fa0b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Apr 2018 01:24:02 +0700 Subject: [PATCH 12/47] [kaltura] Improve embeds detection (closes #16201) --- youtube_dl/extractor/generic.py | 18 +++++++++++++++++- youtube_dl/extractor/kaltura.py | 6 +++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index e3cb5c5ce..af1322e00 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1220,7 +1220,7 @@ class GenericIE(InfoExtractor): 'title': '35871', 'timestamp': 1355743100, 'upload_date': '20121217', - 'uploader_id': 'batchUser', + 'uploader_id': 'cplapp@learn360.com', }, 'add_ie': ['Kaltura'], }, @@ -1271,6 +1271,22 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Kaltura'], }, + { + # meta twitter:player + 'url': 'http://thechive.com/2017/12/08/all-i-want-for-christmas-is-more-twerk/', + 'info_dict': { + 'id': '0_01b42zps', + 'ext': 'mp4', + 'title': 'Main Twerk (Video)', + 'upload_date': '20171208', + 'uploader_id': 'sebastian.salinas@thechive.com', + 'timestamp': 1512713057, + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['Kaltura'], + }, # referrer protected EaglePlatform embed { 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/', diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 562e25f6d..0ea89e4d6 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -135,10 +135,10 @@ class KalturaIE(InfoExtractor): ''', webpage) or re.search( r'''(?xs) - ]+src=(?P["']) - (?:https?:)?//(?:www\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P\d+) + <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P["']) + (?:https?:)?//(?:(?:www|cdnapi)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P\d+) (?:(?!(?P=q1)).)* - [?&]entry_id=(?P(?:(?!(?P=q1))[^&])+) + [?&;]entry_id=(?P(?:(?!(?P=q1))[^&])+) (?P=q1) ''', webpage) ) From 9b3036bd2e431ed4b037a3df21528a6e9bcb05b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Apr 2018 10:12:24 +0700 Subject: [PATCH 13/47] [instagram:user] Fix extraction (closes #16119) --- youtube_dl/extractor/instagram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 5cea37d92..0c13f54ee 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -261,7 +261,7 @@ class InstagramUserIE(InfoExtractor): for page_num in itertools.count(1): variables = json.dumps({ 'id': uploader_id, - 'first': 100, + 'first': 12, 'after': cursor, }) From b004d9bbf18ee2b6a9b916657c4d6734ff0d0adb Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 19 Apr 2018 15:07:50 +0100 Subject: [PATCH 14/47] [cbssports] fix extraction(fixes #16217) --- youtube_dl/extractor/cbssports.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/cbssports.py b/youtube_dl/extractor/cbssports.py index 3a62c840b..27a243d08 100644 --- a/youtube_dl/extractor/cbssports.py +++ b/youtube_dl/extractor/cbssports.py @@ -4,28 +4,33 @@ from .cbs import CBSBaseIE class CBSSportsIE(CBSBaseIE): - _VALID_URL = r'https?://(?:www\.)?cbssports\.com/video/player/[^/]+/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/(?:video|news)/(?P[^/?#&]+)' _TESTS = [{ - 'url': 'http://www.cbssports.com/video/player/videos/708337219968/0/ben-simmons-the-next-lebron?-not-so-fast', + 'url': 'https://www.cbssports.com/nba/video/donovan-mitchell-flashes-star-potential-in-game-2-victory-over-thunder/', 'info_dict': { - 'id': '708337219968', + 'id': '1214315075735', 'ext': 'mp4', - 'title': 'Ben Simmons the next LeBron? Not so fast', - 'description': 'md5:854294f627921baba1f4b9a990d87197', - 'timestamp': 1466293740, - 'upload_date': '20160618', + 'title': 'Donovan Mitchell flashes star potential in Game 2 victory over Thunder', + 'description': 'md5:df6f48622612c2d6bd2e295ddef58def', + 'timestamp': 1524111457, + 'upload_date': '20180419', 'uploader': 'CBSI-NEW', }, 'params': { # m3u8 download 'skip_download': True, } + }, { + 'url': 'https://www.cbssports.com/nba/news/nba-playoffs-2018-watch-76ers-vs-heat-game-3-series-schedule-tv-channel-online-stream/', + 'only_matching': True, }] def _extract_video_info(self, filter_query, video_id): return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id) def _real_extract(self, url): - video_id = self._match_id(url) + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id= self._search_regex([r'(?:=|%26)pcid%3D(\d+)', r'embedVideo(?:Container)?_(\d+)'], webpage, 'video id') return self._extract_video_info('byId=%s' % video_id, video_id) From d86c5167ae5a1c33451e98d7e05d5b32b6fa3156 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 19 Apr 2018 15:48:03 +0100 Subject: [PATCH 15/47] [nexx] extract new azure urls(closes #16223) --- youtube_dl/extractor/nexx.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py index c7029d29e..5e46a75c0 100644 --- a/youtube_dl/extractor/nexx.py +++ b/youtube_dl/extractor/nexx.py @@ -230,15 +230,18 @@ class NexxIE(InfoExtractor): azure_locator = stream_data['azureLocator'] - AZURE_URL = 'http://nx%s%02d.akamaized.net/' - - def get_cdn_shield_base(shield_type='', prefix='-p'): + def get_cdn_shield_base(shield_type='', static=False): for secure in ('', 's'): cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper())) if cdn_shield: return 'http%s://%s' % (secure, cdn_shield) else: - return AZURE_URL % (prefix, int(stream_data['azureAccount'].replace('nexxplayplus', ''))) + if 'fb' in stream_data['azureAccount']: + prefix = 'df' if static else 'f' + else: + prefix = 'd' if static else 'p' + account = int(stream_data['azureAccount'].replace('nexxplayplus', '').replace('nexxplayfb', '')) + return 'http://nx-%s%02d.akamaized.net/' % (prefix, account) azure_stream_base = get_cdn_shield_base() is_ml = ',' in language @@ -260,7 +263,7 @@ class NexxIE(InfoExtractor): formats.extend(self._extract_ism_formats( azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False)) - azure_progressive_base = get_cdn_shield_base('Prog', '-d') + azure_progressive_base = get_cdn_shield_base('Prog', True) azure_file_distribution = stream_data.get('azureFileDistribution') if azure_file_distribution: fds = azure_file_distribution.split(',') From 5a19d231ca8e15d07c2a5ebd3cd6cc46b7596edc Mon Sep 17 00:00:00 2001 From: Douglas Su Date: Thu, 19 Apr 2018 23:21:50 +0800 Subject: [PATCH 16/47] [YoutubeDL] Fix typo in media extension compatibility checker --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index fca4999eb..ad3598805 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1853,7 +1853,7 @@ class YoutubeDL(object): def compatible_formats(formats): video, audio = formats # Check extension - video_ext, audio_ext = audio.get('ext'), video.get('ext') + video_ext, audio_ext = video.get('ext'), audio.get('ext') if video_ext and audio_ext: COMPATIBLE_EXTS = ( ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'), From 1792bc3a06dbdb788d12a1e6a4a8d7072be70edb Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Thu, 19 Apr 2018 10:25:51 -0500 Subject: [PATCH 17/47] [keezmovies] Add support for generic embeds (closes #16134) --- youtube_dl/extractor/keezmovies.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index e83115e2a..d4e6f7ac1 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -20,23 +20,23 @@ from ..utils import ( class KeezMoviesIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/(?:(?P[^/]+)-)?(?P\d+)' _TESTS = [{ - 'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711', - 'md5': '1c1e75d22ffa53320f45eeb07bc4cdc0', + 'url': 'https://www.keezmovies.com/video/arab-wife-want-it-so-bad-i-see-she-thirsty-and-has-tiny-money-18070681', + 'md5': '2ac69cdb882055f71d82db4311732a1a', 'info_dict': { - 'id': '1214711', - 'display_id': 'petite-asian-lady-mai-playing-in-bathtub', + 'id': '18070681', + 'display_id': 'arab-wife-want-it-so-bad-i-see-she-thirsty-and-has-tiny-money', 'ext': 'mp4', - 'title': 'Petite Asian Lady Mai Playing In Bathtub', - 'thumbnail': r're:^https?://.*\.jpg$', + 'title': 'Arab wife want it so bad I see she thirsty and has tiny money.', + 'thumbnail': None, 'view_count': int, 'age_limit': 18, } }, { - 'url': 'http://www.keezmovies.com/video/1214711', + 'url': 'http://www.keezmovies.com/video/18070681', 'only_matching': True, }] - def _extract_info(self, url): + def _extract_info(self, url, fatal=True): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') display_id = (mobj.group('display_id') @@ -55,7 +55,7 @@ class KeezMoviesIE(InfoExtractor): encrypted = False def extract_format(format_url, height=None): - if not isinstance(format_url, compat_str) or not format_url.startswith('http'): + if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//')): return if format_url in format_urls: return @@ -105,7 +105,11 @@ class KeezMoviesIE(InfoExtractor): raise ExtractorError( 'Video %s is no longer available' % video_id, expected=True) - self._sort_formats(formats) + try: + self._sort_formats(formats) + except ExtractorError: + if fatal: + raise if not title: title = self._html_search_regex( @@ -122,7 +126,9 @@ class KeezMoviesIE(InfoExtractor): } def _real_extract(self, url): - webpage, info = self._extract_info(url) + webpage, info = self._extract_info(url, fatal=False) + if not info['formats']: + return self.url_result(url, 'Generic') info['view_count'] = str_to_int(self._search_regex( r'([\d,.]+) Views?', webpage, 'view count', fatal=False)) return info From d317973284f6d9886bda0bf8215ffb4f060af41d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 19 Apr 2018 22:36:33 +0700 Subject: [PATCH 18/47] [extremetube] Fix metadata extraction --- youtube_dl/extractor/extremetube.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py index 445f9438d..acd4090fa 100644 --- a/youtube_dl/extractor/extremetube.py +++ b/youtube_dl/extractor/extremetube.py @@ -8,12 +8,12 @@ class ExtremeTubeIE(KeezMoviesIE): _VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P[^/#?&]+)' _TESTS = [{ 'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431', - 'md5': '1fb9228f5e3332ec8c057d6ac36f33e0', + 'md5': '92feaafa4b58e82f261e5419f39c60cb', 'info_dict': { 'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431', 'ext': 'mp4', 'title': 'Music Video 14 british euro brit european cumshots swallow', - 'uploader': 'unknown', + 'uploader': 'anonim', 'view_count': int, 'age_limit': 18, } @@ -36,10 +36,10 @@ class ExtremeTubeIE(KeezMoviesIE): r']+title="([^"]+)"[^>]*>', webpage, 'title') uploader = self._html_search_regex( - r'Uploaded by:\s*\s*(.+?)\s*', + r'Uploaded by:\s*]+>\s*]+>(.+?)', webpage, 'uploader', fatal=False) view_count = str_to_int(self._search_regex( - r'Views:\s*\s*([\d,\.]+)', + r'Views:\s*]+>\s*<[^>]+>([\d,\.]+) Date: Thu, 19 Apr 2018 22:38:31 +0700 Subject: [PATCH 19/47] [mofosex] Fix test --- youtube_dl/extractor/mofosex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mofosex.py b/youtube_dl/extractor/mofosex.py index 54716f5c7..1c652813a 100644 --- a/youtube_dl/extractor/mofosex.py +++ b/youtube_dl/extractor/mofosex.py @@ -12,7 +12,7 @@ class MofosexIE(KeezMoviesIE): _VALID_URL = r'https?://(?:www\.)?mofosex\.com/videos/(?P\d+)/(?P[^/?#&.]+)\.html' _TESTS = [{ 'url': 'http://www.mofosex.com/videos/318131/amateur-teen-playing-and-masturbating-318131.html', - 'md5': '39a15853632b7b2e5679f92f69b78e91', + 'md5': '558fcdafbb63a87c019218d6e49daf8a', 'info_dict': { 'id': '318131', 'display_id': 'amateur-teen-playing-and-masturbating-318131', From d65a48a0efd2184f7b2fdc823433f568bae56d86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 20 Apr 2018 23:12:13 +0700 Subject: [PATCH 20/47] [nick] Add support for nickjr.nl (closes #16230) --- youtube_dl/extractor/nick.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index 090f1acee..256a24d86 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -81,13 +81,23 @@ class NickIE(MTVServicesInfoExtractor): class NickBrIE(MTVServicesInfoExtractor): IE_NAME = 'nickelodeon:br' - _VALID_URL = r'https?://(?P(?:www\.)?nickjr|mundonick\.uol)\.com\.br/(?:programas/)?[^/]+/videos/(?:episodios/)?(?P[^/?#.]+)' + _VALID_URL = r'''(?x) + https?:// + (?: + (?P(?:www\.)?nickjr|mundonick\.uol)\.com\.br| + (?:www\.)?nickjr\.nl + ) + /(?:programas/)?[^/]+/videos/(?:episodios/)?(?P[^/?\#.]+) + ''' _TESTS = [{ 'url': 'http://www.nickjr.com.br/patrulha-canina/videos/210-labirinto-de-pipoca/', 'only_matching': True, }, { 'url': 'http://mundonick.uol.com.br/programas/the-loud-house/videos/muitas-irmas/7ljo9j', 'only_matching': True, + }, { + 'url': 'http://www.nickjr.nl/paw-patrol/videos/311-ge-wol-dig-om-terug-te-zijn/', + 'only_matching': True, }] def _real_extract(self, url): From 4b8588fe0215fb5ea75d4f37402ec51014cb8c53 Mon Sep 17 00:00:00 2001 From: einstein95 Date: Fri, 12 Jan 2018 07:01:02 +1300 Subject: [PATCH 21/47] [rentv] Fix extraction --- youtube_dl/extractor/rentv.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/rentv.py b/youtube_dl/extractor/rentv.py index d338b3a93..df528b09e 100644 --- a/youtube_dl/extractor/rentv.py +++ b/youtube_dl/extractor/rentv.py @@ -26,9 +26,20 @@ class RENTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage('http://ren.tv/player/' + video_id, video_id) - jw_config = self._parse_json(self._search_regex( - r'config\s*=\s*({.+});', webpage, 'jw config'), video_id) - return self._parse_jwplayer_data(jw_config, video_id, m3u8_id='hls') + config = self._parse_json(self._search_regex( + r'config\s*=\s*({.+});', webpage, 'config'), video_id) + formats = [] + for video in config.get('src', ''): + formats.append({ + 'url': video.get('src', '') + }) + self._sort_formats(formats) + return { + 'id': video_id, + 'formats': formats, + 'title': config.get('title', ''), + 'thumbnail': config.get('image', '') + } class RENTVArticleIE(InfoExtractor): From a693386df1957ba03cbf5156a65dd18b2c37ac42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 21 Apr 2018 23:22:10 +0700 Subject: [PATCH 22/47] [rentv] Improve extraction (closes #15227) --- youtube_dl/extractor/rentv.py | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/rentv.py b/youtube_dl/extractor/rentv.py index df528b09e..8bcf87126 100644 --- a/youtube_dl/extractor/rentv.py +++ b/youtube_dl/extractor/rentv.py @@ -3,6 +3,10 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_str +from ..utils import ( + determine_ext, + int_or_none, +) class RENTVIE(InfoExtractor): @@ -13,7 +17,9 @@ class RENTVIE(InfoExtractor): 'info_dict': { 'id': '118577', 'ext': 'mp4', - 'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"' + 'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"', + 'timestamp': 1472230800, + 'upload_date': '20160826', } }, { 'url': 'http://ren.tv/player/118577', @@ -27,18 +33,31 @@ class RENTVIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage('http://ren.tv/player/' + video_id, video_id) config = self._parse_json(self._search_regex( - r'config\s*=\s*({.+});', webpage, 'config'), video_id) + r'config\s*=\s*({.+})\s*;', webpage, 'config'), video_id) + title = config['title'] formats = [] - for video in config.get('src', ''): - formats.append({ - 'url': video.get('src', '') - }) + for video in config['src']: + src = video.get('src') + if not src or not isinstance(src, compat_str): + continue + ext = determine_ext(src) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + src, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': src, + }) self._sort_formats(formats) return { 'id': video_id, + 'title': title, + 'description': config.get('description'), + 'thumbnail': config.get('image'), + 'duration': int_or_none(config.get('duration')), + 'timestamp': int_or_none(config.get('date')), 'formats': formats, - 'title': config.get('title', ''), - 'thumbnail': config.get('image', '') } From 040c6296bb9da495d37ba134baee996a3a97b64f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 22 Apr 2018 04:55:35 +0700 Subject: [PATCH 23/47] [ccma] Fix video extraction (closes #15931) --- youtube_dl/extractor/ccma.py | 50 +++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/ccma.py b/youtube_dl/extractor/ccma.py index bec0a825a..07f5206c1 100644 --- a/youtube_dl/extractor/ccma.py +++ b/youtube_dl/extractor/ccma.py @@ -4,11 +4,13 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( + clean_html, int_or_none, parse_duration, parse_iso8601, - clean_html, + parse_resolution, ) @@ -40,34 +42,42 @@ class CCMAIE(InfoExtractor): def _real_extract(self, url): media_type, media_id = re.match(self._VALID_URL, url).groups() - media_data = {} - formats = [] - profiles = ['pc'] if media_type == 'audio' else ['mobil', 'pc'] - for i, profile in enumerate(profiles): - md = self._download_json('http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={ + + media = self._download_json( + 'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={ 'media': media_type, 'idint': media_id, - 'profile': profile, - }, fatal=False) - if md: - media_data = md - media_url = media_data.get('media', {}).get('url') - if media_url: - formats.append({ - 'format_id': profile, - 'url': media_url, - 'quality': i, - }) + }) + + formats = [] + media_url = media['media']['url'] + if isinstance(media_url, list): + for format_ in media_url: + format_url = format_.get('file') + if not format_url or not isinstance(format_url, compat_str): + continue + label = format_.get('label') + f = parse_resolution(label) + f.update({ + 'url': format_url, + 'format_id': label, + }) + formats.append(f) + else: + formats.append({ + 'url': media_url, + 'vcodec': 'none' if media_type == 'audio' else None, + }) self._sort_formats(formats) - informacio = media_data['informacio'] + informacio = media['informacio'] title = informacio['titol'] durada = informacio.get('durada', {}) duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text')) timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc')) subtitles = {} - subtitols = media_data.get('subtitols', {}) + subtitols = media.get('subtitols', {}) if subtitols: sub_url = subtitols.get('url') if sub_url: @@ -77,7 +87,7 @@ class CCMAIE(InfoExtractor): }) thumbnails = [] - imatges = media_data.get('imatges', {}) + imatges = media.get('imatges', {}) if imatges: thumbnail_url = imatges.get('url') if thumbnail_url: From 353f0bde78eb4dd9432c092f244bb30b2abd7f70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 22 Apr 2018 04:57:22 +0700 Subject: [PATCH 24/47] [cbssports] PEP 8 --- youtube_dl/extractor/cbssports.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cbssports.py b/youtube_dl/extractor/cbssports.py index 27a243d08..83b764762 100644 --- a/youtube_dl/extractor/cbssports.py +++ b/youtube_dl/extractor/cbssports.py @@ -32,5 +32,7 @@ class CBSSportsIE(CBSBaseIE): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - video_id= self._search_regex([r'(?:=|%26)pcid%3D(\d+)', r'embedVideo(?:Container)?_(\d+)'], webpage, 'video id') + video_id = self._search_regex( + [r'(?:=|%26)pcid%3D(\d+)', r'embedVideo(?:Container)?_(\d+)'], + webpage, 'video id') return self._extract_video_info('byId=%s' % video_id, video_id) From 488ff2dd3a193544a9912776d1c1b9d9fffc8fe7 Mon Sep 17 00:00:00 2001 From: 0x9fff00 <0x9fff00+git@protonmail.ch> Date: Sat, 17 Mar 2018 16:14:20 +0100 Subject: [PATCH 25/47] [svt] Add support for TV channel live streams (Closes #15279) --- youtube_dl/extractor/svt.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index b544da414..d01f85422 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -22,6 +22,8 @@ class SVTBaseIE(InfoExtractor): _GEO_COUNTRIES = ['SE'] def _extract_video(self, video_info, video_id): + is_live = dict_get(video_info, ('live', 'simulcast'), default=False) + m3u8_protocol = 'm3u8' if is_live else 'm3u8_native' formats = [] for vr in video_info['videoReferences']: player_type = vr.get('playerType') or vr.get('format') @@ -30,7 +32,7 @@ class SVTBaseIE(InfoExtractor): if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( vurl, video_id, - ext='mp4', entry_protocol='m3u8_native', + ext='mp4', entry_protocol=m3u8_protocol, m3u8_id=player_type, fatal=False)) elif ext == 'f4m': formats.extend(self._extract_f4m_formats( @@ -90,6 +92,7 @@ class SVTBaseIE(InfoExtractor): 'season_number': season_number, 'episode': episode, 'episode_number': episode_number, + 'is_live': is_live, } @@ -134,7 +137,7 @@ class SVTPlayBaseIE(SVTBaseIE): class SVTPlayIE(SVTPlayBaseIE): IE_DESC = 'SVT Play and Öppet arkiv' - _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp)/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P\w+)' _TESTS = [{ 'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2', 'md5': '2b6704fe4a28801e1a098bbf3c5ac611', @@ -158,6 +161,9 @@ class SVTPlayIE(SVTPlayBaseIE): }, { 'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg', 'only_matching': True, + }, { + 'url': 'https://www.svtplay.se/kanaler/svt1', + 'only_matching': True, }] def _real_extract(self, url): @@ -183,6 +189,8 @@ class SVTPlayIE(SVTPlayBaseIE): 'title': data['context']['dispatcher']['stores']['MetaStore']['title'], 'thumbnail': thumbnail, }) + if info_dict['is_live']: + info_dict['title'] = self._live_title(info_dict['title']) return info_dict video_id = self._search_regex( @@ -198,6 +206,8 @@ class SVTPlayIE(SVTPlayBaseIE): info_dict['title'] = re.sub( r'\s*\|\s*.+?$', '', info_dict.get('episode') or self._og_search_title(webpage)) + if info_dict['is_live']: + info_dict['title'] = self._live_title(info_dict['title']) return info_dict From 6cdaaf703149f1d6f1d24cfdb5a538ca41d08a26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 22 Apr 2018 05:33:08 +0700 Subject: [PATCH 26/47] [svt] Improve (closes #15809) --- youtube_dl/extractor/svt.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index d01f85422..f71eab8b2 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -137,7 +137,7 @@ class SVTPlayBaseIE(SVTBaseIE): class SVTPlayIE(SVTPlayBaseIE): IE_DESC = 'SVT Play and Öppet arkiv' - _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P\w+)' + _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2', 'md5': '2b6704fe4a28801e1a098bbf3c5ac611', @@ -179,6 +179,10 @@ class SVTPlayIE(SVTPlayBaseIE): thumbnail = self._og_search_thumbnail(webpage) + def adjust_title(info): + if info['is_live']: + info['title'] = self._live_title(info['title']) + if data: video_info = try_get( data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'], @@ -189,8 +193,7 @@ class SVTPlayIE(SVTPlayBaseIE): 'title': data['context']['dispatcher']['stores']['MetaStore']['title'], 'thumbnail': thumbnail, }) - if info_dict['is_live']: - info_dict['title'] = self._live_title(info_dict['title']) + adjust_title(info_dict) return info_dict video_id = self._search_regex( @@ -206,8 +209,7 @@ class SVTPlayIE(SVTPlayBaseIE): info_dict['title'] = re.sub( r'\s*\|\s*.+?$', '', info_dict.get('episode') or self._og_search_title(webpage)) - if info_dict['is_live']: - info_dict['title'] = self._live_title(info_dict['title']) + adjust_title(info_dict) return info_dict From 3853309fe238bb709b7c5db261724c33b48a8693 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 22 Apr 2018 06:07:32 +0700 Subject: [PATCH 27/47] [youtube:feed] Implement lazy playlist extraction (closes #10184) --- youtube_dl/extractor/youtube.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 617be8e96..e9965509c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2699,10 +2699,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): def _real_initialize(self): self._login() - def _real_extract(self, url): - page = self._download_webpage( - 'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE) - + def _entries(self, page): # The extraction process is the same as for playlists, but the regex # for the video ids doesn't contain an index ids = [] @@ -2713,12 +2710,15 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): # 'recommended' feed has infinite 'load more' and each new portion spins # the same videos in (sometimes) slightly different order, so we'll check # for unicity and break when portion has no new videos - new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches)) + new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches))) if not new_ids: break ids.extend(new_ids) + for entry in self._ids_to_results(new_ids): + yield entry + mobj = re.search(r'data-uix-load-more-href="/?(?P[^"]+)"', more_widget_html) if not mobj: break @@ -2730,8 +2730,12 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): content_html = more['content_html'] more_widget_html = more['load_more_widget_html'] + def _real_extract(self, url): + page = self._download_webpage( + 'https://www.youtube.com/feed/%s' % self._FEED_NAME, + self._PLAYLIST_TITLE) return self.playlist_result( - self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE) + self._entries(page), playlist_title=self._PLAYLIST_TITLE) class YoutubeWatchLaterIE(YoutubePlaylistIE): From 70d35d166c1cfb14af20fb6d45ed820b6249f941 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 22 Apr 2018 06:08:05 +0700 Subject: [PATCH 28/47] [youtube] Add ability to authenticate with cookies --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e9965509c..e7bd1f18f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -87,7 +87,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): (username, password) = self._get_login_info() # No authentication to be performed if username is None: - if self._LOGIN_REQUIRED: + if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None: raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) return True From 2441c1aab152cd81d53f0a6fca982af9f8c8de10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 23 Apr 2018 00:16:52 +0700 Subject: [PATCH 29/47] [breakcom] Fix extraction (closes #16254) --- youtube_dl/extractor/breakcom.py | 148 ++++++++++--------------------- 1 file changed, 47 insertions(+), 101 deletions(-) diff --git a/youtube_dl/extractor/breakcom.py b/youtube_dl/extractor/breakcom.py index 5a87c2661..70d16767f 100644 --- a/youtube_dl/extractor/breakcom.py +++ b/youtube_dl/extractor/breakcom.py @@ -3,15 +3,13 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from .youtube import YoutubeIE from ..compat import compat_str -from ..utils import ( - int_or_none, - parse_age_limit, -) +from ..utils import int_or_none class BreakIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?Pbreak|screenjunkies)\.com/video/(?P[^/]+?)(?:-(?P\d+))?(?:[/?#&]|$)' + _VALID_URL = r'https?://(?:www\.)?break\.com/video/(?P[^/]+?)(?:-(?P\d+))?(?:[/?#&]|$)' _TESTS = [{ 'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056', 'info_dict': { @@ -19,125 +17,73 @@ class BreakIE(InfoExtractor): 'ext': 'mp4', 'title': 'When Girls Act Like D-Bags', 'age_limit': 13, + }, + }, { + # youtube embed + 'url': 'http://www.break.com/video/someone-forgot-boat-brakes-work', + 'info_dict': { + 'id': 'RrrDLdeL2HQ', + 'ext': 'mp4', + 'title': 'Whale Watching Boat Crashing Into San Diego Dock', + 'description': 'md5:afc1b2772f0a8468be51dd80eb021069', + 'upload_date': '20160331', + 'uploader': 'Steve Holden', + 'uploader_id': 'sdholden07', + }, + 'params': { + 'skip_download': True, } - }, { - 'url': 'http://www.screenjunkies.com/video/best-quentin-tarantino-movie-2841915', - 'md5': '5c2b686bec3d43de42bde9ec047536b0', - 'info_dict': { - 'id': '2841915', - 'display_id': 'best-quentin-tarantino-movie', - 'ext': 'mp4', - 'title': 'Best Quentin Tarantino Movie', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 3671, - 'age_limit': 13, - 'tags': list, - }, - }, { - 'url': 'http://www.screenjunkies.com/video/honest-trailers-the-dark-knight', - 'info_dict': { - 'id': '2348808', - 'display_id': 'honest-trailers-the-dark-knight', - 'ext': 'mp4', - 'title': 'Honest Trailers - The Dark Knight', - 'thumbnail': r're:^https?://.*\.(?:jpg|png)', - 'age_limit': 10, - 'tags': list, - }, - }, { - # requires subscription but worked around - 'url': 'http://www.screenjunkies.com/video/knocking-dead-ep-1-the-show-so-far-3003285', - 'info_dict': { - 'id': '3003285', - 'display_id': 'knocking-dead-ep-1-the-show-so-far', - 'ext': 'mp4', - 'title': 'State of The Dead Recap: Knocking Dead Pilot', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 3307, - 'age_limit': 13, - 'tags': list, - }, }, { 'url': 'http://www.break.com/video/ugc/baby-flex-2773063', 'only_matching': True, }] - _DEFAULT_BITRATES = (48, 150, 320, 496, 864, 2240, 3264) - def _real_extract(self, url): - site, display_id, video_id = re.match(self._VALID_URL, url).groups() + display_id, video_id = re.match(self._VALID_URL, url).groups() - if not video_id: - webpage = self._download_webpage(url, display_id) - video_id = self._search_regex( - (r'src=["\']/embed/(\d+)', r'data-video-content-id=["\'](\d+)'), - webpage, 'video id') + webpage = self._download_webpage(url, display_id) - webpage = self._download_webpage( - 'http://www.%s.com/embed/%s' % (site, video_id), - display_id, 'Downloading video embed page') - embed_vars = self._parse_json( + youtube_url = YoutubeIE._extract_url(webpage) + if youtube_url: + return self.url_result(youtube_url, ie=YoutubeIE.ie_key()) + + content = self._parse_json( self._search_regex( - r'(?s)embedVars\s*=\s*({.+?})\s*', webpage, 'embed vars'), + r'(?s)content["\']\s*:\s*(\[.+?\])\s*[,\n]', webpage, + 'content'), display_id) - youtube_id = embed_vars.get('youtubeId') - if youtube_id: - return self.url_result(youtube_id, 'Youtube') - - title = embed_vars['contentName'] - formats = [] - bitrates = [] - for f in embed_vars.get('media', []): - if not f.get('uri') or f.get('mediaPurpose') != 'play': + for video in content: + video_url = video.get('url') + if not video_url or not isinstance(video_url, compat_str): continue - bitrate = int_or_none(f.get('bitRate')) - if bitrate: - bitrates.append(bitrate) + bitrate = int_or_none(self._search_regex( + r'(\d+)_kbps', video_url, 'tbr', default=None)) formats.append({ - 'url': f['uri'], + 'url': video_url, 'format_id': 'http-%d' % bitrate if bitrate else 'http', - 'width': int_or_none(f.get('width')), - 'height': int_or_none(f.get('height')), 'tbr': bitrate, - 'format': 'mp4', }) - - if not bitrates: - # When subscriptionLevel > 0, i.e. plus subscription is required - # media list will be empty. However, hds and hls uris are still - # available. We can grab them assuming bitrates to be default. - bitrates = self._DEFAULT_BITRATES - - auth_token = embed_vars.get('AuthToken') - - def construct_manifest_url(base_url, ext): - pieces = [base_url] - pieces.extend([compat_str(b) for b in bitrates]) - pieces.append('_kbps.mp4.%s?%s' % (ext, auth_token)) - return ','.join(pieces) - - if bitrates and auth_token: - hds_url = embed_vars.get('hdsUri') - if hds_url: - formats.extend(self._extract_f4m_formats( - construct_manifest_url(hds_url, 'f4m'), - display_id, f4m_id='hds', fatal=False)) - hls_url = embed_vars.get('hlsUri') - if hls_url: - formats.extend(self._extract_m3u8_formats( - construct_manifest_url(hls_url, 'm3u8'), - display_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) self._sort_formats(formats) + title = self._search_regex( + (r'title["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', + r']*>(?P[^<]+)'), webpage, 'title', group='value') + + def get(key, name): + return int_or_none(self._search_regex( + r'%s["\']\s*:\s*["\'](\d+)' % key, webpage, name, + default=None)) + + age_limit = get('ratings', 'age limit') + video_id = video_id or get('pid', 'video id') or display_id + return { 'id': video_id, 'display_id': display_id, 'title': title, - 'thumbnail': embed_vars.get('thumbUri'), - 'duration': int_or_none(embed_vars.get('videoLengthInSeconds')) or None, - 'age_limit': parse_age_limit(embed_vars.get('audienceRating')), - 'tags': embed_vars.get('tags', '').split(','), + 'thumbnail': self._og_search_thumbnail(webpage), + 'age_limit': age_limit, 'formats': formats, } From af751350e8651f665333554fa13b335b073fa736 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 23 Apr 2018 02:50:11 +0700 Subject: [PATCH 30/47] [Makefile] Add support for pandoc 2 and disable smart extension (closes #16251) smart extension rewrites straight quotes as curly quotes, -- as en-dashes and so on that is unwanted behavior. --- Makefile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index fe247810f..4a62f44bc 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,9 @@ PYTHON ?= /usr/bin/env python # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) +# set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2 +MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi) + install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish install -d $(DESTDIR)$(BINDIR) install -m 755 youtube-dl $(DESTDIR)$(BINDIR) @@ -82,11 +85,11 @@ supportedsites: $(PYTHON) devscripts/make_supportedsites.py docs/supportedsites.md README.txt: README.md - pandoc -f markdown -t plain README.md -o README.txt + pandoc -f $(MARKDOWN) -t plain README.md -o README.txt youtube-dl.1: README.md $(PYTHON) devscripts/prepare_manpage.py youtube-dl.1.temp.md - pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1 + pandoc -s -f $(MARKDOWN) -t man youtube-dl.1.temp.md -o youtube-dl.1 rm -f youtube-dl.1.temp.md youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-completion.in From 171625469ab1b2a4dc99ed173a10be45e7fc13d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 23 Apr 2018 03:17:34 +0700 Subject: [PATCH 31/47] [etonline] Remove extractor (closes #16256) Covered by generic extractor --- youtube_dl/extractor/etonline.py | 39 ------------------------------ youtube_dl/extractor/extractors.py | 1 - 2 files changed, 40 deletions(-) delete mode 100644 youtube_dl/extractor/etonline.py diff --git a/youtube_dl/extractor/etonline.py b/youtube_dl/extractor/etonline.py deleted file mode 100644 index 17d7cfec6..000000000 --- a/youtube_dl/extractor/etonline.py +++ /dev/null @@ -1,39 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class ETOnlineIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?etonline\.com/(?:[^/]+/)*(?P[^/?#&]+)' - _TESTS = [{ - 'url': 'http://www.etonline.com/tv/211130_dove_cameron_liv_and_maddie_emotional_episode_series_finale/', - 'info_dict': { - 'id': '211130_dove_cameron_liv_and_maddie_emotional_episode_series_finale', - 'title': 'md5:a21ec7d3872ed98335cbd2a046f34ee6', - 'description': 'md5:8b94484063f463cca709617c79618ccd', - }, - 'playlist_count': 2, - }, { - 'url': 'http://www.etonline.com/media/video/here_are_the_stars_who_love_bringing_their_moms_as_dates_to_the_oscars-211359/', - 'only_matching': True, - }] - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911076001/default_default/index.html?videoId=ref:%s' - - def _real_extract(self, url): - playlist_id = self._match_id(url) - - webpage = self._download_webpage(url, playlist_id) - - entries = [ - self.url_result( - self.BRIGHTCOVE_URL_TEMPLATE % video_id, 'BrightcoveNew', video_id) - for video_id in re.findall( - r'site\.brightcove\s*\([^,]+,\s*["\'](title_\d+)', webpage)] - - return self.playlist_result( - entries, playlist_id, - self._og_search_title(webpage, fatal=False), - self._og_search_description(webpage)) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3570fa165..6fb65e4fe 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -326,7 +326,6 @@ from .espn import ( FiveThirtyEightIE, ) from .esri import EsriVideoIE -from .etonline import ETOnlineIE from .europa import EuropaIE from .everyonesmixtape import EveryonesMixtapeIE from .expotv import ExpoTVIE From 99036a1298089068dcf80c0985bfcc3f8c24f281 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 23 Apr 2018 04:03:11 +0700 Subject: [PATCH 32/47] [pornflip] Relax _VALID_URL (closes #16258) --- youtube_dl/extractor/pornflip.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornflip.py b/youtube_dl/extractor/pornflip.py index ee04936e1..025985fbc 100644 --- a/youtube_dl/extractor/pornflip.py +++ b/youtube_dl/extractor/pornflip.py @@ -14,7 +14,7 @@ from ..utils import ( class PornFlipIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P[0-9A-Za-z-]{11})' + _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://www.pornflip.com/v/wz7DfNhMmep', 'md5': '98c46639849145ae1fd77af532a9278c', @@ -40,6 +40,9 @@ class PornFlipIE(InfoExtractor): }, { 'url': 'https://www.pornflip.com/embed/EkRD6-vS2-s', 'only_matching': True, + }, { + 'url': 'https://www.pornflip.com/v/NG9q6Pb_iK8', + 'only_matching': True, }] def _real_extract(self, url): From 1cc47c667419e0eadc0a6989256ab7b276852adf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 24 Apr 2018 23:49:30 +0700 Subject: [PATCH 33/47] [utils] Fix match_str for boolean meta fields --- test/test_utils.py | 12 ++++++++++++ youtube_dl/utils.py | 4 ++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index a1fe6fdb2..253a7fe17 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1072,6 +1072,18 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') self.assertFalse(match_str( 'like_count > 100 & dislike_count %s)\s*(?P[a-z_]+) From 0ff51adae6feab7386874eddc0d61dbeaf063bf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 24 Apr 2018 23:53:01 +0700 Subject: [PATCH 34/47] [twitch] Extract is_live according to status (closes #16259) --- youtube_dl/extractor/twitch.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index f736283e9..4c11fd3c3 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -168,6 +168,13 @@ class TwitchItemBaseIE(TwitchBaseIE): return self.playlist_result(entries, info['id'], info['title']) def _extract_info(self, info): + status = info.get('status') + if status == 'recording': + is_live = True + elif status == 'recorded': + is_live = False + else: + is_live = None return { 'id': info['_id'], 'title': info.get('title') or 'Untitled Broadcast', @@ -178,6 +185,7 @@ class TwitchItemBaseIE(TwitchBaseIE): 'uploader_id': info.get('channel', {}).get('name'), 'timestamp': parse_iso8601(info.get('recorded_at')), 'view_count': int_or_none(info.get('views')), + 'is_live': is_live, } def _real_extract(self, url): From 76030543cd5e2214c47aa82f03b3e2cec97e7bc1 Mon Sep 17 00:00:00 2001 From: Alexandre Macabies Date: Tue, 24 Apr 2018 19:49:30 +0200 Subject: [PATCH 35/47] [openload] Recognize IPv6 stream URLs (closes #16137) --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 650f95656..d0bdd60b8 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -340,7 +340,10 @@ class OpenloadIE(InfoExtractor): get_element_by_id('streamurj', webpage) or self._search_regex( (r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<', - r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)'), webpage, + r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)', + r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<', + r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<', + r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage, 'stream URL')) video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id From 5d0fe6d23e4407bee3caec33955d4cb410bebb5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 Apr 2018 00:56:16 +0700 Subject: [PATCH 36/47] Credit @Zopieux for #16250 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 6223212aa..880e0abee 100644 --- a/AUTHORS +++ b/AUTHORS @@ -236,3 +236,4 @@ Lei Wang Petr Novák Leonardo Taccari Martin Weinelt +Alexandre Macabies From 95284bc281d8aa3b1d6863ccb536da9d4cf6433c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 Apr 2018 01:01:06 +0700 Subject: [PATCH 37/47] Credit @TingPing for picarto (#15551) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 880e0abee..812051796 100644 --- a/AUTHORS +++ b/AUTHORS @@ -236,4 +236,5 @@ Lei Wang Petr Novák Leonardo Taccari Martin Weinelt +TingPing Alexandre Macabies From ecb24f7c081b764dd669cb4b277d8c14e55b2a39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 Apr 2018 01:02:28 +0700 Subject: [PATCH 38/47] Credit @f2face for #16115 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 812051796..eaf96d79d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -236,5 +236,6 @@ Lei Wang Petr Novák Leonardo Taccari Martin Weinelt +Surya Oktafendri TingPing Alexandre Macabies From e028d4f506562a1febf76277795305e296823ad6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 Apr 2018 01:03:42 +0700 Subject: [PATCH 39/47] [ChangeLog] Actualize [ci skip] --- ChangeLog | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/ChangeLog b/ChangeLog index 185fa1753..a731fde29 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,32 @@ +version + +Core +* [utils] Fix match_str for boolean meta fields ++ [Makefile] Add support for pandoc 2 and disable smart extension (#16251) +* [YoutubeDL] Fix typo in media extension compatibility checker (#16215) + +Extractors ++ [openload] Recognize IPv6 stream URLs (#16136, #16137, #16205, #16246, + #16250) ++ [twitch] Extract is_live according to status (#16259) +* [pornflip] Relax URL regular expression (#16258) +- [etonline] Remove extractor (#16256) +* [breakcom] Fix extraction (#16254) ++ [youtube] Add ability to authenticate with cookies +* [youtube:feed] Implement lazy playlist extraction (#10184) ++ [svt] Add support for TV channel live streams (#15279, #15809) +* [ccma] Fix video extraction (#15931) +* [rentv] Fix extraction (#15227) ++ [nick] Add support for nickjr.nl (#16230) +* [extremetube] Fix metadata extraction ++ [keezmovies] Add support for generic embeds (#16134, #16154) +* [nexx] Extract new azure URLs (#16223) +* [cbssports] Fix extraction (#16217) +* [kaltura] Improve embeds detection (#16201) +* [instagram:user] Fix extraction (#16119) +* [cbs] Skip DRM asset types (#16104) + + version 2018.04.16 Extractors From b5802d69f511481a87d8604fa1577bca8370cab5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 Apr 2018 01:12:40 +0700 Subject: [PATCH 40/47] release 2018.04.25 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 - youtube_dl/version.py | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 69f996179..252fa0adf 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.04.16*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.04.16** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.04.25*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.04.25** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.04.16 +[debug] youtube-dl version 2018.04.25 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index a731fde29..4a3df67df 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2018.04.25 Core * [utils] Fix match_str for boolean meta fields diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 715d16cfe..a110f687b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -257,7 +257,6 @@ - **ESPN** - **ESPNArticle** - **EsriVideo** - - **ETOnline** - **Europa** - **EveryonesMixtape** - **ExpoTV** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5aefdd0a2..4e3cb39c6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.04.16' +__version__ = '2018.04.25' From d3711b00502d9104a3697aba5d210a25066ca756 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 Apr 2018 02:14:27 +0700 Subject: [PATCH 41/47] [devscripts/gh-pages/generate-download.py] Use program checksum from versions.json --- devscripts/gh-pages/generate-download.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/devscripts/gh-pages/generate-download.py b/devscripts/gh-pages/generate-download.py index fcd7e1dff..a873d32ee 100755 --- a/devscripts/gh-pages/generate-download.py +++ b/devscripts/gh-pages/generate-download.py @@ -1,27 +1,22 @@ #!/usr/bin/env python3 from __future__ import unicode_literals -import hashlib -import urllib.request import json versions_info = json.load(open('update/versions.json')) version = versions_info['latest'] -URL = versions_info['versions'][version]['bin'][0] - -data = urllib.request.urlopen(URL).read() +version_dict = versions_info['versions'][version] # Read template page with open('download.html.in', 'r', encoding='utf-8') as tmplf: template = tmplf.read() -sha256sum = hashlib.sha256(data).hexdigest() template = template.replace('@PROGRAM_VERSION@', version) -template = template.replace('@PROGRAM_URL@', URL) -template = template.replace('@PROGRAM_SHA256SUM@', sha256sum) -template = template.replace('@EXE_URL@', versions_info['versions'][version]['exe'][0]) -template = template.replace('@EXE_SHA256SUM@', versions_info['versions'][version]['exe'][1]) -template = template.replace('@TAR_URL@', versions_info['versions'][version]['tar'][0]) -template = template.replace('@TAR_SHA256SUM@', versions_info['versions'][version]['tar'][1]) +template = template.replace('@PROGRAM_URL@', version_dict['bin'][0]) +template = template.replace('@PROGRAM_SHA256SUM@', version_dict['bin'][1]) +template = template.replace('@EXE_URL@', version_dict['exe'][0]) +template = template.replace('@EXE_SHA256SUM@', version_dict['exe'][1]) +template = template.replace('@TAR_URL@', version_dict['tar'][0]) +template = template.replace('@TAR_SHA256SUM@', version_dict['tar'][1]) with open('download.html', 'w', encoding='utf-8') as dlf: dlf.write(template) From c84eae4f66be8a22c14b852bdb01773bb3807239 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 27 Apr 2018 03:45:52 +0700 Subject: [PATCH 42/47] [funk:channel] Improve extraction (closes #16285) --- youtube_dl/extractor/funk.py | 51 ++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/funk.py b/youtube_dl/extractor/funk.py index faea6576f..0ff058619 100644 --- a/youtube_dl/extractor/funk.py +++ b/youtube_dl/extractor/funk.py @@ -5,7 +5,10 @@ import re from .common import InfoExtractor from .nexx import NexxIE -from ..utils import int_or_none +from ..utils import ( + int_or_none, + try_get, +) class FunkBaseIE(InfoExtractor): @@ -77,6 +80,20 @@ class FunkChannelIE(FunkBaseIE): 'params': { 'skip_download': True, }, + }, { + # only available via byIdList API + 'url': 'https://www.funk.net/channel/informr/martin-sonneborn-erklaert-die-eu', + 'info_dict': { + 'id': '205067', + 'ext': 'mp4', + 'title': 'Martin Sonneborn erklärt die EU', + 'description': 'md5:050f74626e4ed87edf4626d2024210c0', + 'timestamp': 1494424042, + 'upload_date': '20170510', + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/', 'only_matching': True, @@ -87,16 +104,28 @@ class FunkChannelIE(FunkBaseIE): channel_id = mobj.group('id') alias = mobj.group('alias') - results = self._download_json( - 'https://www.funk.net/api/v3.0/content/videos/filter', channel_id, - headers={ - 'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbCIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxzZWFyY2gtYXBpIn0.q4Y2xZG8PFHai24-4Pjx2gym9RmJejtmK6lMXP5wAgc', - 'Referer': url, - }, query={ - 'channelId': channel_id, - 'size': 100, - })['result'] + headers = { + 'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbCIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxzZWFyY2gtYXBpIn0.q4Y2xZG8PFHai24-4Pjx2gym9RmJejtmK6lMXP5wAgc', + 'Referer': url, + } - video = next(r for r in results if r.get('alias') == alias) + video = None + + by_id_list = self._download_json( + 'https://www.funk.net/api/v3.0/content/videos/byIdList', channel_id, + headers=headers, query={ + 'ids': alias, + }, fatal=False) + if by_id_list: + video = try_get(by_id_list, lambda x: x['result'][0], dict) + + if not video: + results = self._download_json( + 'https://www.funk.net/api/v3.0/content/videos/filter', channel_id, + headers=headers, query={ + 'channelId': channel_id, + 'size': 100, + })['result'] + video = next(r for r in results if r.get('alias') == alias) return self._make_url_result(video) From 0fe7783eced5c62dbd95780c2150fd1080bd3927 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Apr 2018 01:59:15 +0700 Subject: [PATCH 43/47] [extractor/common] Add _download_json_handle --- youtube_dl/extractor/common.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 59b9d3739..e0c3c8eb0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -682,18 +682,30 @@ class InfoExtractor(object): else: self.report_warning(errmsg + str(ve)) - def _download_json(self, url_or_request, video_id, - note='Downloading JSON metadata', - errnote='Unable to download JSON metadata', - transform_source=None, - fatal=True, encoding=None, data=None, headers={}, query={}): - json_string = self._download_webpage( + def _download_json_handle( + self, url_or_request, video_id, note='Downloading JSON metadata', + errnote='Unable to download JSON metadata', transform_source=None, + fatal=True, encoding=None, data=None, headers={}, query={}): + """Return a tuple (JSON object, URL handle)""" + res = self._download_webpage_handle( url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query) - if (not fatal) and json_string is False: - return None + if res is False: + return res + json_string, urlh = res return self._parse_json( - json_string, video_id, transform_source=transform_source, fatal=fatal) + json_string, video_id, transform_source=transform_source, + fatal=fatal), urlh + + def _download_json( + self, url_or_request, video_id, note='Downloading JSON metadata', + errnote='Unable to download JSON metadata', transform_source=None, + fatal=True, encoding=None, data=None, headers={}, query={}): + res = self._download_json_handle( + url_or_request, video_id, note=note, errnote=errnote, + transform_source=transform_source, fatal=fatal, encoding=encoding, + data=data, headers=headers, query=query) + return res if res is False else res[0] def _parse_json(self, json_string, video_id, transform_source=None, fatal=True): if transform_source: From 6cc622327ff8289f94894f3695ed31014c61cf8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Apr 2018 02:47:17 +0700 Subject: [PATCH 44/47] [utils] Introduce merge_dicts --- test/test_utils.py | 12 ++++++++++++ youtube_dl/extractor/generic.py | 16 +--------------- youtube_dl/utils.py | 14 ++++++++++++++ 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 253a7fe17..14503ab53 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -42,6 +42,7 @@ from youtube_dl.utils import ( is_html, js_to_json, limit_length, + merge_dicts, mimetype2ext, month_by_name, multipart_encode, @@ -669,6 +670,17 @@ class TestUtil(unittest.TestCase): self.assertEqual(dict_get(d, ('b', 'c', key, )), None) self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value) + def test_merge_dicts(self): + self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2}) + self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1}) + self.assertEqual(merge_dicts({'a': 1}, {'a': None}), {'a': 1}) + self.assertEqual(merge_dicts({'a': 1}, {'a': ''}), {'a': 1}) + self.assertEqual(merge_dicts({'a': 1}, {}), {'a': 1}) + self.assertEqual(merge_dicts({'a': None}, {'a': 1}), {'a': 1}) + self.assertEqual(merge_dicts({'a': ''}, {'a': 1}), {'a': ''}) + self.assertEqual(merge_dicts({'a': ''}, {'a': 'abc'}), {'a': 'abc'}) + self.assertEqual(merge_dicts({'a': None}, {'a': ''}, {'a': 'abc'}), {'a': 'abc'}) + def test_encode_compat_str(self): self.assertEqual(encode_compat_str(b'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'тест') self.assertEqual(encode_compat_str('тест', 'utf-8'), 'тест') diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index af1322e00..d48914495 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -23,6 +23,7 @@ from ..utils import ( is_html, js_to_json, KNOWN_EXTENSIONS, + merge_dicts, mimetype2ext, orderedSet, sanitized_Request, @@ -3002,21 +3003,6 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( sharevideos_urls, video_id, video_title) - def merge_dicts(dict1, dict2): - merged = {} - for k, v in dict1.items(): - if v is not None: - merged[k] = v - for k, v in dict2.items(): - if v is None: - continue - if (k not in merged or - (isinstance(v, compat_str) and v and - isinstance(merged[k], compat_str) and - not merged[k])): - merged[k] = v - return merged - # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') if entries: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 574284e94..b460393bf 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2225,6 +2225,20 @@ def try_get(src, getter, expected_type=None): return v +def merge_dicts(*dicts): + merged = {} + for a_dict in dicts: + for k, v in a_dict.items(): + if v is None: + continue + if (k not in merged or + (isinstance(v, compat_str) and v and + isinstance(merged[k], compat_str) and + not merged[k])): + merged[k] = v + return merged + + def encode_compat_str(string, encoding=preferredencoding(), errors='strict'): return string if isinstance(string, compat_str) else compat_str(string, encoding, errors) From e7e4a6e0f9166cee82c165ca69a6a3c94ddc5f45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Apr 2018 02:48:03 +0700 Subject: [PATCH 45/47] [extractor/common] Extract interaction statistic --- youtube_dl/extractor/common.py | 35 ++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e0c3c8eb0..a9939b0fd 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1020,6 +1020,40 @@ class InfoExtractor(object): if isinstance(json_ld, dict): json_ld = [json_ld] + INTERACTION_TYPE_MAP = { + 'CommentAction': 'comment', + 'AgreeAction': 'like', + 'DisagreeAction': 'dislike', + 'LikeAction': 'like', + 'DislikeAction': 'dislike', + 'ListenAction': 'view', + 'WatchAction': 'view', + 'ViewAction': 'view', + } + + def extract_interaction_statistic(e): + interaction_statistic = e.get('interactionStatistic') + if not isinstance(interaction_statistic, list): + return + for is_e in interaction_statistic: + if not isinstance(is_e, dict): + continue + if is_e.get('@type') != 'InteractionCounter': + continue + interaction_type = is_e.get('interactionType') + if not isinstance(interaction_type, compat_str): + continue + interaction_count = int_or_none(is_e.get('userInteractionCount')) + if interaction_count is None: + continue + count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1]) + if not count_kind: + continue + count_key = '%s_count' % count_kind + if info.get(count_key) is not None: + continue + info[count_key] = interaction_count + def extract_video_object(e): assert e['@type'] == 'VideoObject' info.update({ @@ -1035,6 +1069,7 @@ class InfoExtractor(object): 'height': int_or_none(e.get('height')), 'view_count': int_or_none(e.get('interactionCount')), }) + extract_interaction_statistic(e) for e in json_ld: if isinstance(e.get('@context'), compat_str) and re.match(r'^https?://schema.org/?$', e.get('@context')): From ae1c585cee3eb183cddf7c30a09b75d887307dee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Apr 2018 02:48:20 +0700 Subject: [PATCH 46/47] [vimeo] Extract JSON LD (closes #16295) --- youtube_dl/extractor/vimeo.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 08257147e..a026526b2 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -16,6 +16,7 @@ from ..utils import ( ExtractorError, InAdvancePagedList, int_or_none, + merge_dicts, NO_DEFAULT, RegexNotFoundError, sanitized_Request, @@ -639,16 +640,18 @@ class VimeoIE(VimeoBaseInfoExtractor): 'preference': 1, }) - info_dict = self._parse_config(config, video_id) - formats.extend(info_dict['formats']) + info_dict_config = self._parse_config(config, video_id) + formats.extend(info_dict_config['formats']) self._vimeo_sort_formats(formats) + json_ld = self._search_json_ld(webpage, video_id, default={}) + if not cc_license: cc_license = self._search_regex( r']+rel=["\']license["\'][^>]+href=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'license', default=None, group='license') - info_dict.update({ + info_dict = { 'id': video_id, 'formats': formats, 'timestamp': unified_timestamp(timestamp), @@ -658,7 +661,9 @@ class VimeoIE(VimeoBaseInfoExtractor): 'like_count': like_count, 'comment_count': comment_count, 'license': cc_license, - }) + } + + info_dict = merge_dicts(info_dict, info_dict_config, json_ld) return info_dict From 7dd6ab4a47b08beafe45befa29c44df2db00547e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Apr 2018 04:51:39 +0700 Subject: [PATCH 47/47] [imdb] Extract all formats (closes #16249) --- youtube_dl/extractor/imdb.py | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 3ff672a89..425421968 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -3,7 +3,9 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( + determine_ext, mimetype2ext, qualities, remove_end, @@ -73,19 +75,25 @@ class ImdbIE(InfoExtractor): video_info_list = format_info.get('videoInfoList') if not video_info_list or not isinstance(video_info_list, list): continue - video_info = video_info_list[0] - if not video_info or not isinstance(video_info, dict): - continue - video_url = video_info.get('videoUrl') - if not video_url: - continue - format_id = format_info.get('ffname') - formats.append({ - 'format_id': format_id, - 'url': video_url, - 'ext': mimetype2ext(video_info.get('videoMimeType')), - 'quality': quality(format_id), - }) + for video_info in video_info_list: + if not video_info or not isinstance(video_info, dict): + continue + video_url = video_info.get('videoUrl') + if not video_url or not isinstance(video_url, compat_str): + continue + if (video_info.get('videoMimeType') == 'application/x-mpegURL' or + determine_ext(video_url) == 'm3u8'): + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + continue + format_id = format_info.get('ffname') + formats.append({ + 'format_id': format_id, + 'url': video_url, + 'ext': mimetype2ext(video_info.get('videoMimeType')), + 'quality': quality(format_id), + }) self._sort_formats(formats) return {