From 9b5aead6aa8ad82a5eecd2bc26c0e94399e92ca7 Mon Sep 17 00:00:00 2001 From: Timmy Date: Sat, 14 Apr 2018 17:04:42 +0200 Subject: [PATCH 01/11] [vine:user] Fix extraction (closes #15514) --- youtube_dl/extractor/vine.py | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index 46950d3a1..08ddffa66 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import re -import itertools from .common import InfoExtractor from ..utils import ( @@ -116,14 +115,14 @@ class VineUserIE(InfoExtractor): _VINE_BASE_URL = 'https://vine.co/' _TESTS = [ { - 'url': 'https://vine.co/Visa', + 'url': 'https://vine.co/itsruthb', 'info_dict': { - 'id': 'Visa', + 'id': 'itsruthb', }, - 'playlist_mincount': 46, + 'playlist_mincount': 611, }, { - 'url': 'https://vine.co/u/941705360593584128', + 'url': 'https://vine.co/u/942914934646415360', 'only_matching': True, }, ] @@ -139,16 +138,10 @@ class VineUserIE(InfoExtractor): profile_url, user, note='Downloading user profile data') user_id = profile_data['data']['userId'] - timeline_data = [] - for pagenum in itertools.count(1): - timeline_url = '%sapi/timelines/users/%s?page=%s&size=100' % ( - self._VINE_BASE_URL, user_id, pagenum) - timeline_page = self._download_json( - timeline_url, user, note='Downloading page %d' % pagenum) - timeline_data.extend(timeline_page['data']['records']) - if timeline_page['data']['nextPage'] is None: - break - + user_archive = self._download_json( + 'https://archive.vine.co/profiles/%s.json' % user_id, user_id) + posts = user_archive['posts'] entries = [ - self.url_result(e['permalinkUrl'], 'Vine') for e in timeline_data] + self.url_result('https://vine.co/v/%s' % post_id, 'Vine') + for post_id in posts] return self.playlist_result(entries, user) From 8e41c9ad01b6deda96c29f685c4d8861b8759ba5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Apr 2018 22:43:25 +0700 Subject: [PATCH 02/11] [vine:user] Improve extraction (closes #16190) --- youtube_dl/extractor/vine.py | 45 +++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index 08ddffa66..80b896b56 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( determine_ext, int_or_none, @@ -111,21 +112,24 @@ class VineIE(InfoExtractor): class VineUserIE(InfoExtractor): IE_NAME = 'vine:user' - _VALID_URL = r'(?:https?://)?vine\.co/(?Pu/)?(?P[^/]+)/?(\?.*)?$' + _VALID_URL = r'https?://vine\.co/(?Pu/)?(?P[^/]+)' _VINE_BASE_URL = 'https://vine.co/' - _TESTS = [ - { - 'url': 'https://vine.co/itsruthb', - 'info_dict': { - 'id': 'itsruthb', - }, - 'playlist_mincount': 611, + _TESTS = [{ + 'url': 'https://vine.co/itsruthb', + 'info_dict': { + 'id': 'itsruthb', + 'title': 'Ruth B', + 'description': '| Instagram/Twitter: itsruthb | still a lost boy from neverland', }, - { - 'url': 'https://vine.co/u/942914934646415360', - 'only_matching': True, - }, - ] + 'playlist_mincount': 611, + }, { + 'url': 'https://vine.co/u/942914934646415360', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if VineIE.suitable(url) else super(VineUserIE, cls).suitable(url) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -137,11 +141,14 @@ class VineUserIE(InfoExtractor): profile_data = self._download_json( profile_url, user, note='Downloading user profile data') - user_id = profile_data['data']['userId'] - user_archive = self._download_json( + data = profile_data['data'] + user_id = data.get('userId') or data['userIdStr'] + profile = self._download_json( 'https://archive.vine.co/profiles/%s.json' % user_id, user_id) - posts = user_archive['posts'] entries = [ - self.url_result('https://vine.co/v/%s' % post_id, 'Vine') - for post_id in posts] - return self.playlist_result(entries, user) + self.url_result( + 'https://vine.co/v/%s' % post_id, ie='Vine', video_id=post_id) + for post_id in profile['posts'] + if post_id and isinstance(post_id, compat_str)] + return self.playlist_result( + entries, user, profile.get('username'), profile.get('description')) From d6166a7602f5b78a4bb552ba0f4b176cbc0a4a03 Mon Sep 17 00:00:00 2001 From: Patrick Griffis Date: Tue, 21 Mar 2017 00:49:31 +0200 Subject: [PATCH 03/11] [picarto] Add extractor --- youtube_dl/extractor/extractors.py | 4 ++ youtube_dl/extractor/picarto.py | 87 ++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100755 youtube_dl/extractor/picarto.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c9f60114d..d83e93dec 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -815,6 +815,10 @@ from .periscope import ( from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE +from .picarto import ( + PicartoVodIE, + PicartoIE, +) from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pladform import PladformIE diff --git a/youtube_dl/extractor/picarto.py b/youtube_dl/extractor/picarto.py new file mode 100755 index 000000000..1d6f714ed --- /dev/null +++ b/youtube_dl/extractor/picarto.py @@ -0,0 +1,87 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ExtractorError, js_to_json, urlencode_postdata + + +class PicartoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P[a-zA-Z0-9]+)[^/]*$' + _TEST = { + 'url': 'https://picarto.tv/Setz', + 'info_dict': { + 'id': 'Setz', + 'ext': 'mp4', + 'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'timestamp': int, + 'is_live': True + }, + 'params': { + 'skip_download': True + } + } + + def _real_extract(self, url): + channel_id = self._match_id(url) + stream_page = self._download_webpage(url, channel_id) + + if 'This channel does not exist.' in stream_page: + raise ExtractorError('Channel does not exist', expected=True) + + player_settings_js = self._html_search_regex( + r'(?s)playerSettings\[1\]\s*=\s*(\{.+?\}\n)', stream_page, 'player-settings') + player_settings = self._parse_json(player_settings_js, channel_id, + transform_source=js_to_json) + if not player_settings.get('online'): + raise ExtractorError('Stream is offline', expected=True) + + cdn_data = self._download_json('https://picarto.tv/process/channel', channel_id, + data=urlencode_postdata({'loadbalancinginfo': channel_id}), + note='Fetching load balancer info') + edge = [edge['ep'] for edge in cdn_data['edges'] if edge['id'] == cdn_data['preferedEdge']][0] + + formats = self._extract_m3u8_formats('https://%s/hls/%s/index.m3u8' % (edge, channel_id), + channel_id, 'mp4') + formats.append({'url': 'https://%s/mp4/%s.mp4' % (edge, channel_id)}) + self._sort_formats(formats) + + return { + 'id': channel_id, + 'formats': formats, + 'ext': 'mp4', + 'title': self._live_title(channel_id), + 'is_live': True, + 'thumbnail': player_settings.get('vodThumb'), + 'age_limit': 18 if player_settings.get('mature') else None, + } + + +class PicartoVodIE(InfoExtractor): + _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P[a-zA-Z0-9_\-\.]+).flv' + _TEST = { + 'url': 'https://picarto.tv/videopopout/Carrot_2018.01.11.07.55.12.flv', + 'md5': '80765b67813053ff31d4df2bd5e900ce', + 'info_dict': { + 'id': 'Carrot_2018.01.11.07.55.12', + 'ext': 'mp4', + 'title': 'Carrot_2018.01.11.07.55.12', + 'thumbnail': r're:^https?://.*\.jpg$' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + vod_info_js = self._html_search_regex(r'(?s)"#vod-player",\s*(\{.+?\})\)', + webpage, video_id) + vod_info = self._parse_json(vod_info_js, video_id, transform_source=js_to_json) + + return { + 'id': video_id, + 'title': video_id, + 'ext': 'mp4', + 'protocol': 'm3u8', + 'url': vod_info['vod'], + 'thumbnail': vod_info.get('vodThumb'), + } From a42839e548d81ae20e5164ae690075d2c423477e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Apr 2018 00:31:25 +0700 Subject: [PATCH 04/11] [picarto] Improve extraction (closes #6205, closes #12514, closes #15276, closes #15551) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/picarto.py | 152 ++++++++++++++++++++++------- 2 files changed, 116 insertions(+), 38 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d83e93dec..3570fa165 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -816,8 +816,8 @@ from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE from .picarto import ( - PicartoVodIE, PicartoIE, + PicartoVodIE, ) from .piksel import PikselIE from .pinkbike import PinkbikeIE diff --git a/youtube_dl/extractor/picarto.py b/youtube_dl/extractor/picarto.py index 1d6f714ed..2366dfb34 100755 --- a/youtube_dl/extractor/picarto.py +++ b/youtube_dl/extractor/picarto.py @@ -1,12 +1,21 @@ # coding: utf-8 from __future__ import unicode_literals +import time + from .common import InfoExtractor -from ..utils import ExtractorError, js_to_json, urlencode_postdata +from ..compat import compat_str +from ..utils import ( + ExtractorError, + js_to_json, + try_get, + update_url_query, + urlencode_postdata, +) class PicartoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P[a-zA-Z0-9]+)[^/]*$' + _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P[a-zA-Z0-9]+)' _TEST = { 'url': 'https://picarto.tv/Setz', 'info_dict': { @@ -16,72 +25,141 @@ class PicartoIE(InfoExtractor): 'timestamp': int, 'is_live': True }, - 'params': { - 'skip_download': True - } + 'skip': 'Stream is offline', } + @classmethod + def suitable(cls, url): + return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url) + def _real_extract(self, url): channel_id = self._match_id(url) stream_page = self._download_webpage(url, channel_id) - if 'This channel does not exist.' in stream_page: - raise ExtractorError('Channel does not exist', expected=True) + if '>This channel does not exist' in stream_page: + raise ExtractorError( + 'Channel %s does not exist' % channel_id, expected=True) - player_settings_js = self._html_search_regex( - r'(?s)playerSettings\[1\]\s*=\s*(\{.+?\}\n)', stream_page, 'player-settings') - player_settings = self._parse_json(player_settings_js, channel_id, - transform_source=js_to_json) - if not player_settings.get('online'): + player = self._parse_json( + self._search_regex( + r'(?s)playerSettings\[\d+\]\s*=\s*(\{.+?\}\s*\n)', stream_page, + 'player settings'), + channel_id, transform_source=js_to_json) + + if player.get('online') is False: raise ExtractorError('Stream is offline', expected=True) - cdn_data = self._download_json('https://picarto.tv/process/channel', channel_id, + cdn_data = self._download_json( + 'https://picarto.tv/process/channel', channel_id, data=urlencode_postdata({'loadbalancinginfo': channel_id}), - note='Fetching load balancer info') - edge = [edge['ep'] for edge in cdn_data['edges'] if edge['id'] == cdn_data['preferedEdge']][0] + note='Downloading load balancing info') - formats = self._extract_m3u8_formats('https://%s/hls/%s/index.m3u8' % (edge, channel_id), - channel_id, 'mp4') - formats.append({'url': 'https://%s/mp4/%s.mp4' % (edge, channel_id)}) + def get_event(key): + return try_get(player, lambda x: x['event'][key], compat_str) or '' + + params = { + 'token': player.get('token') or '', + 'ticket': get_event('ticket'), + 'con': int(time.time() * 1000), + 'type': get_event('ticket'), + 'scope': get_event('scope'), + } + + prefered_edge = cdn_data.get('preferedEdge') + default_tech = player.get('defaultTech') + + formats = [] + + for edge in cdn_data['edges']: + edge_ep = edge.get('ep') + if not edge_ep or not isinstance(edge_ep, compat_str): + continue + edge_id = edge.get('id') + for tech in cdn_data['techs']: + tech_label = tech.get('label') + tech_type = tech.get('type') + preference = 0 + if edge_id == prefered_edge: + preference += 1 + if tech_type == default_tech: + preference += 1 + format_id = [] + if edge_id: + format_id.append(edge_id) + if tech_type == 'application/x-mpegurl' or tech_label == 'HLS': + format_id.append('hls') + formats.extend(self._extract_m3u8_formats( + update_url_query( + 'https://%s/hls/%s/index.m3u8' + % (edge_ep, channel_id), params), + channel_id, 'mp4', preference=preference, + m3u8_id='-'.join(format_id), fatal=False)) + continue + elif tech_type == 'video/mp4' or tech_label == 'MP4': + format_id.append('mp4') + formats.append({ + 'url': update_url_query( + 'https://%s/mp4/%s.mp4' % (edge_ep, channel_id), + params), + 'format_id': '-'.join(format_id), + 'preference': preference, + }) + else: + # rtmp format does not seem to work + continue self._sort_formats(formats) + mature = player.get('mature') + if mature is None: + age_limit = None + else: + age_limit = 18 if mature is True else 0 + return { 'id': channel_id, - 'formats': formats, - 'ext': 'mp4', 'title': self._live_title(channel_id), 'is_live': True, - 'thumbnail': player_settings.get('vodThumb'), - 'age_limit': 18 if player_settings.get('mature') else None, + 'thumbnail': player.get('vodThumb'), + 'age_limit': age_limit, + 'formats': formats, } class PicartoVodIE(InfoExtractor): - _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P[a-zA-Z0-9_\-\.]+).flv' - _TEST = { - 'url': 'https://picarto.tv/videopopout/Carrot_2018.01.11.07.55.12.flv', - 'md5': '80765b67813053ff31d4df2bd5e900ce', + _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv', + 'md5': '3ab45ba4352c52ee841a28fb73f2d9ca', 'info_dict': { - 'id': 'Carrot_2018.01.11.07.55.12', + 'id': 'ArtofZod_2017.12.12.00.13.23.flv', 'ext': 'mp4', - 'title': 'Carrot_2018.01.11.07.55.12', - 'thumbnail': r're:^https?://.*\.jpg$' - } - } + 'title': 'ArtofZod_2017.12.12.00.13.23.flv', + 'thumbnail': r're:^https?://.*\.jpg' + }, + }, { + 'url': 'https://picarto.tv/videopopout/Plague', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - vod_info_js = self._html_search_regex(r'(?s)"#vod-player",\s*(\{.+?\})\)', - webpage, video_id) - vod_info = self._parse_json(vod_info_js, video_id, transform_source=js_to_json) + vod_info = self._parse_json( + self._search_regex( + r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage, + video_id), + video_id, transform_source=js_to_json) + + formats = self._extract_m3u8_formats( + vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + self._sort_formats(formats) return { 'id': video_id, 'title': video_id, - 'ext': 'mp4', - 'protocol': 'm3u8', - 'url': vod_info['vod'], 'thumbnail': vod_info.get('vodThumb'), + 'formats': formats, } From c07cb68e7974a2ecd94f4101e6f094414df16e75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Apr 2018 00:54:21 +0700 Subject: [PATCH 05/11] [smotri:broadcast] Fix extraction (closes #16180) --- youtube_dl/extractor/smotri.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index 370fa8879..45995f30f 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -310,6 +310,7 @@ class SmotriBroadcastIE(InfoExtractor): IE_DESC = 'Smotri.com broadcasts' IE_NAME = 'smotri:broadcast' _VALID_URL = r'https?://(?:www\.)?(?Psmotri\.com/live/(?P[^/]+))/?.*' + _NETRC_MACHINE = 'smotri' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -352,17 +353,18 @@ class SmotriBroadcastIE(InfoExtractor): adult_content = False ticket = self._html_search_regex( - r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'([^']+)'\)", - broadcast_page, 'broadcast ticket') + (r'data-user-file=(["\'])(?P(?!\1).+)\1', + r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'(?P[^']+)'\)"), + broadcast_page, 'broadcast ticket', group='ticket') - url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket + broadcast_url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket broadcast_password = self._downloader.params.get('videopassword') if broadcast_password: - url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest() + broadcast_url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest() broadcast_json_page = self._download_webpage( - url, broadcast_id, 'Downloading broadcast JSON') + broadcast_url, broadcast_id, 'Downloading broadcast JSON') try: broadcast_json = json.loads(broadcast_json_page) From 0e6ccb3905cb86c53a91af4c9119e2fd102019d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Apr 2018 00:56:05 +0700 Subject: [PATCH 06/11] [ChangeLog] Actualize [ci skip] --- ChangeLog | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4385c4091..12bda4951 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +version + +Extractors +* [smotri:broadcast] Fix extraction (#16180) ++ [picarto] Add support for picarto.tv (#6205, #12514, #15276, #15551) +* [vine:user] Fix extraction (#15514, #16190) +* [pornhub] Relax URL regular expression (#16165) +* [cbc:watch] Re-acquire device token when expired (#16160) ++ [fxnetworks] Add support for https theplatform URLs (#16125, #16157) ++ [instagram:user] Add request signing (#16119) ++ [twitch] Add support for mobile URLs (#16146) + + version 2018.04.09 Core From bdf7ba6f3a626b4c873257091d0771e54bd02dfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Apr 2018 01:07:21 +0700 Subject: [PATCH 07/11] Set chmod 644 for all extractors --- youtube_dl/extractor/americastestkitchen.py | 0 youtube_dl/extractor/cda.py | 0 youtube_dl/extractor/joj.py | 0 youtube_dl/extractor/picarto.py | 0 4 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 youtube_dl/extractor/americastestkitchen.py mode change 100755 => 100644 youtube_dl/extractor/cda.py mode change 100755 => 100644 youtube_dl/extractor/joj.py mode change 100755 => 100644 youtube_dl/extractor/picarto.py diff --git a/youtube_dl/extractor/americastestkitchen.py b/youtube_dl/extractor/americastestkitchen.py old mode 100755 new mode 100644 diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py old mode 100755 new mode 100644 diff --git a/youtube_dl/extractor/joj.py b/youtube_dl/extractor/joj.py old mode 100755 new mode 100644 diff --git a/youtube_dl/extractor/picarto.py b/youtube_dl/extractor/picarto.py old mode 100755 new mode 100644 From 3c92fd1cd5b5ced11f03ebe64104457c21cd69ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Apr 2018 01:09:18 +0700 Subject: [PATCH 08/11] release 2018.04.16 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index ed622afd1..69f996179 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.04.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.04.09** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.04.16*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.04.16** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.04.09 +[debug] youtube-dl version 2018.04.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 12bda4951..185fa1753 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2018.04.16 Extractors * [smotri:broadcast] Fix extraction (#16180) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 1c13199d4..715d16cfe 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -628,6 +628,8 @@ - **PhilharmonieDeParis**: Philharmonie de Paris - **phoenix.de** - **Photobucket** + - **Picarto** + - **PicartoVod** - **Piksel** - **Pinkbike** - **Pladform** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 307d6041a..5aefdd0a2 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.04.09' +__version__ = '2018.04.16' From 522d6b5c961f584055463f8c69de864ec075083b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 16 Apr 2018 07:48:36 +0100 Subject: [PATCH 09/11] [cbs] skip DRM asset types(fixes #16104) --- youtube_dl/extractor/cbs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index f425562ab..1799d63ea 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -65,7 +65,7 @@ class CBSIE(CBSBaseIE): last_e = None for item in items_data.findall('.//item'): asset_type = xpath_text(item, 'assetType') - if not asset_type or asset_type in asset_types: + if not asset_type or asset_type in asset_types or asset_type in ('HLS_FPS', 'DASH_CENC'): continue asset_types.append(asset_type) query = { From 238d42cf5d4b1a95ba42bf56dcb1bf559ac11c29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 17 Apr 2018 22:37:50 +0700 Subject: [PATCH 10/11] [instagram:user] Fix extraction (closes #16119) --- youtube_dl/extractor/instagram.py | 49 ++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 8da1d5f2f..5cea37d92 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -6,11 +6,16 @@ import json import re from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_str, + compat_HTTPError, +) from ..utils import ( + ExtractorError, get_element_by_attribute, int_or_none, lowercase_escape, + std_headers, try_get, ) @@ -239,6 +244,8 @@ class InstagramUserIE(InfoExtractor): } } + _gis_tmpl = None + def _entries(self, data): def get_count(suffix): return int_or_none(try_get( @@ -257,16 +264,36 @@ class InstagramUserIE(InfoExtractor): 'first': 100, 'after': cursor, }) - s = '%s:%s:%s' % (rhx_gis, csrf_token, variables) - media = self._download_json( - 'https://www.instagram.com/graphql/query/', uploader_id, - 'Downloading JSON page %d' % page_num, headers={ - 'X-Requested-With': 'XMLHttpRequest', - 'X-Instagram-GIS': hashlib.md5(s.encode('utf-8')).hexdigest(), - }, query={ - 'query_hash': '472f257a40c653c64c666ce877d59d2b', - 'variables': variables, - })['data']['user']['edge_owner_to_timeline_media'] + + if self._gis_tmpl: + gis_tmpls = [self._gis_tmpl] + else: + gis_tmpls = [ + '%s' % rhx_gis, + '', + '%s:%s' % (rhx_gis, csrf_token), + '%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent']), + ] + + for gis_tmpl in gis_tmpls: + try: + media = self._download_json( + 'https://www.instagram.com/graphql/query/', uploader_id, + 'Downloading JSON page %d' % page_num, headers={ + 'X-Requested-With': 'XMLHttpRequest', + 'X-Instagram-GIS': hashlib.md5( + ('%s:%s' % (gis_tmpl, variables)).encode('utf-8')).hexdigest(), + }, query={ + 'query_hash': '42323d64886122307be10013ad2dcc44', + 'variables': variables, + })['data']['user']['edge_owner_to_timeline_media'] + self._gis_tmpl = gis_tmpl + break + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + if gis_tmpl != gis_tmpls[-1]: + continue + raise edges = media.get('edges') if not edges or not isinstance(edges, list): From 518d5ba5191e3cc26c81e346ba5117e94db51469 Mon Sep 17 00:00:00 2001 From: Dan Salmon Date: Tue, 17 Apr 2018 12:10:02 -0500 Subject: [PATCH 11/11] Fix some tests --- test/test_subtitles.py | 4 ++-- test/test_youtube_lists.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 1b8de822a..7d57a628e 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -232,7 +232,7 @@ class TestNPOSubtitles(BaseTestSubtitles): class TestMTVSubtitles(BaseTestSubtitles): - url = 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother' + url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans' IE = ComedyCentralIE def getInfoDict(self): @@ -243,7 +243,7 @@ class TestMTVSubtitles(BaseTestSubtitles): self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), set(['en'])) - self.assertEqual(md5(subtitles['en']), 'b9f6ca22a6acf597ec76f61749765e65') + self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961') class TestNRKSubtitles(BaseTestSubtitles): diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 7a33dbf88..c4f0abbea 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -61,7 +61,7 @@ class TestYoutubeLists(unittest.TestCase): dl = FakeYDL() dl.params['extract_flat'] = True ie = YoutubePlaylistIE(dl) - result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') + result = ie.extract('https://www.youtube.com/playlist?list=PL-KKIb8rvtMSrAO9YFbeM6UQrAqoFTUWv') self.assertIsPlaylist(result) for entry in result['entries']: self.assertTrue(entry.get('title'))