From 6e71bbf4abc729cae3b0e428c3bb321690c9e485 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Nov 2017 16:12:56 +0700 Subject: [PATCH 01/41] [hotstar] Bypass geo restriction (closes #14672) --- youtube_dl/extractor/hotstar.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index 3a7a66a34..9be958be6 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -11,6 +11,7 @@ from ..utils import ( class HotStarIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P\d{10})' + _GEO_COUNTRIES = ['IN'] _TESTS = [{ 'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273', 'info_dict': { From 477c97f86b5451f384a84a7a8d8237cfd1bec1d2 Mon Sep 17 00:00:00 2001 From: Alpesh Valia Date: Thu, 16 Mar 2017 22:00:11 +0530 Subject: [PATCH 02/41] [hotstar:playlist] Add extractor --- youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/hotstar.py | 58 +++++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 92f7e9027..d084707ee 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -432,7 +432,10 @@ from .hitbox import HitboxIE, HitboxLiveIE from .hitrecord import HitRecordIE from .hornbunny import HornBunnyIE from .hotnewhiphop import HotNewHipHopIE -from .hotstar import HotStarIE +from .hotstar import ( + HotStarIE, + HotStarPlaylistIE, +) from .howcast import HowcastIE from .howstuffworks import HowStuffWorksIE from .hrti import ( diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index 9be958be6..8d8a80a82 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -7,6 +7,7 @@ from ..utils import ( determine_ext, int_or_none, ) +import re class HotStarIE(InfoExtractor): @@ -17,7 +18,7 @@ class HotStarIE(InfoExtractor): 'info_dict': { 'id': '1000076273', 'ext': 'mp4', - 'title': 'On Air With AIB - English', + 'title': 'On Air With AIB', 'description': 'md5:c957d8868e9bc793ccb813691cc4c434', 'timestamp': 1447227000, 'upload_date': '20151111', @@ -100,3 +101,58 @@ class HotStarIE(InfoExtractor): 'episode_number': int_or_none(video_data.get('episodeNumber')), 'series': video_data.get('contentTitle'), } + + +class HotStarPlaylistIE(InfoExtractor): + IE_NAME = 'hotstar:playlist' + _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/(?P.+)/(?P\d+)/episodes/(?P\d{1,})' + + _TESTS = [{ + 'url': 'http://www.hotstar.com/tv/pow-bandi-yuddh-ke/10999/episodes/10856/9993', + 'info_dict': { + 'id': '10856', + 'title': 'pow-bandi-yuddh-ke', + }, + 'playlist_mincount': 0, + }, { + 'url': 'http://www.hotstar.com/tv/pow-bandi-yuddh-ke/10999/episodes/10856/9993', + 'only_matching': True, + }] + + def _extract_episode_info(self, series_id, playlist_title, video): + + picture_url = video.get('urlPictures') + thumbnail = '' + if picture_url: + thumbnail = 'http://media0-starag.startv.in/r1/thumbs/PCTV/%s/%s/PCTV-%s-hs.jpg' % (picture_url[-2:], picture_url, picture_url) + + episode_title = video.get('episodeTitle', '') + episode_title = episode_title.lower().replace(' ', '-') + url = "http://www.hotstar.com/tv/%s/%s/%s/%s" % (playlist_title, series_id, episode_title, video.get('contentId')) + + info_dict = { + 'id': video.get('contentId'), + 'title': video.get('episodeTitle'), + 'description': video.get('longDescription'), + 'thumbnail': thumbnail, + 'url': url, + '_type': 'url', + } + return info_dict + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + series_id = mobj.group('series_id') + playlist_id = mobj.group('playlist_id') + playlist_title = mobj.group('playlist_title') + + collection = self._download_json( + "http://search.hotstar.com/AVS/besc?action=SearchContents&appVersion=5.0.39&channel=PCTV&moreFilters=series:%s;&query=*&searchOrder=last_broadcast_date+desc,year+asc,title+asc&type=EPISODE" % playlist_id, + playlist_id + ) + + videos = collection.get('resultObj', {}).get('response', {}).get('docs', []) + entries = [ + self._extract_episode_info(series_id, playlist_title, video) + for video in videos if video.get('contentId')] + return self.playlist_result(entries, playlist_id, playlist_title) From 909191de9154bf289b333cfe01b8e88e3ac1fefc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Nov 2017 19:14:48 +0700 Subject: [PATCH 03/41] [hotstar:playlist] Fix issues and improve (closes #12465) --- youtube_dl/extractor/hotstar.py | 128 ++++++++++++++++---------------- 1 file changed, 66 insertions(+), 62 deletions(-) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index 8d8a80a82..d28af36ec 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -1,18 +1,41 @@ # coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - determine_ext, - int_or_none, -) import re +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + determine_ext, + ExtractorError, + int_or_none, +) -class HotStarIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P\d{10})' + +class HotStarBaseIE(InfoExtractor): _GEO_COUNTRIES = ['IN'] + + def _download_json(self, *args, **kwargs): + response = super(HotStarBaseIE, self)._download_json(*args, **kwargs) + if response['resultCode'] != 'OK': + if kwargs.get('fatal'): + raise ExtractorError( + response['errorDescription'], expected=True) + return None + return response['resultObj'] + + def _download_content_info(self, content_id): + return self._download_json( + 'https://account.hotstar.com/AVS/besc', content_id, query={ + 'action': 'GetAggregatedContentDetails', + 'appVersion': '5.0.40', + 'channel': 'PCTV', + 'contentId': content_id, + })['contentInfo'][0] + + +class HotStarIE(HotStarBaseIE): + _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P\d{10})' _TESTS = [{ 'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273', 'info_dict': { @@ -36,23 +59,11 @@ class HotStarIE(InfoExtractor): 'only_matching': True, }] - def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True, query=None): - json_data = super(HotStarIE, self)._download_json( - url_or_request, video_id, note, fatal=fatal, query=query) - if json_data['resultCode'] != 'OK': - if fatal: - raise ExtractorError(json_data['errorDescription']) - return None - return json_data['resultObj'] - def _real_extract(self, url): video_id = self._match_id(url) - video_data = self._download_json( - 'http://account.hotstar.com/AVS/besc', video_id, query={ - 'action': 'GetAggregatedContentDetails', - 'channel': 'PCTV', - 'contentId': video_id, - })['contentInfo'][0] + + video_data = self._download_content_info(video_id) + title = video_data['episodeTitle'] if video_data.get('encrypted') == 'Y': @@ -103,56 +114,49 @@ class HotStarIE(InfoExtractor): } -class HotStarPlaylistIE(InfoExtractor): +class HotStarPlaylistIE(HotStarBaseIE): IE_NAME = 'hotstar:playlist' - _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/(?P.+)/(?P\d+)/episodes/(?P\d{1,})' - + _VALID_URL = r'(?Phttps?://(?:www\.)?hotstar\.com/tv/[^/]+/(?P\d+))/(?P[^/]+)/(?P\d+)' _TESTS = [{ - 'url': 'http://www.hotstar.com/tv/pow-bandi-yuddh-ke/10999/episodes/10856/9993', + 'url': 'http://www.hotstar.com/tv/pratidaan/14982/episodes/14812/9993', 'info_dict': { - 'id': '10856', - 'title': 'pow-bandi-yuddh-ke', + 'id': '14812', }, - 'playlist_mincount': 0, + 'playlist_mincount': 75, }, { - 'url': 'http://www.hotstar.com/tv/pow-bandi-yuddh-ke/10999/episodes/10856/9993', + 'url': 'http://www.hotstar.com/tv/pratidaan/14982/popular-clips/9998/9998', 'only_matching': True, }] - - def _extract_episode_info(self, series_id, playlist_title, video): - - picture_url = video.get('urlPictures') - thumbnail = '' - if picture_url: - thumbnail = 'http://media0-starag.startv.in/r1/thumbs/PCTV/%s/%s/PCTV-%s-hs.jpg' % (picture_url[-2:], picture_url, picture_url) - - episode_title = video.get('episodeTitle', '') - episode_title = episode_title.lower().replace(' ', '-') - url = "http://www.hotstar.com/tv/%s/%s/%s/%s" % (playlist_title, series_id, episode_title, video.get('contentId')) - - info_dict = { - 'id': video.get('contentId'), - 'title': video.get('episodeTitle'), - 'description': video.get('longDescription'), - 'thumbnail': thumbnail, - 'url': url, - '_type': 'url', - } - return info_dict + _ITEM_TYPES = { + 'episodes': 'EPISODE', + 'popular-clips': 'CLIPS', + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - series_id = mobj.group('series_id') - playlist_id = mobj.group('playlist_id') - playlist_title = mobj.group('playlist_title') + base_url = mobj.group('url') + content_id = mobj.group('content_id') + playlist_type = mobj.group('type') + + content_info = self._download_content_info(content_id) + playlist_id = compat_str(content_info['categoryId']) collection = self._download_json( - "http://search.hotstar.com/AVS/besc?action=SearchContents&appVersion=5.0.39&channel=PCTV&moreFilters=series:%s;&query=*&searchOrder=last_broadcast_date+desc,year+asc,title+asc&type=EPISODE" % playlist_id, - playlist_id - ) + 'https://search.hotstar.com/AVS/besc', playlist_id, query={ + 'action': 'SearchContents', + 'appVersion': '5.0.40', + 'channel': 'PCTV', + 'moreFilters': 'series:%s;' % playlist_id, + 'query': '*', + 'searchOrder': 'last_broadcast_date desc,year desc,title asc', + 'type': self._ITEM_TYPES.get(playlist_type, 'EPISODE'), + }) - videos = collection.get('resultObj', {}).get('response', {}).get('docs', []) entries = [ - self._extract_episode_info(series_id, playlist_title, video) - for video in videos if video.get('contentId')] - return self.playlist_result(entries, playlist_id, playlist_title) + self.url_result( + '%s/_/%s' % (base_url, video['contentId']), + ie=HotStarIE.ie_key(), video_id=video['contentId']) + for video in collection['response']['docs'] + if video.get('contentId')] + + return self.playlist_result(entries, playlist_id) From e0998333fac2238eff8880992c11f76402c4007c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 6 Nov 2017 22:36:46 +0700 Subject: [PATCH 04/41] [ChangeLog] Actualize --- ChangeLog | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/ChangeLog b/ChangeLog index d33a710fb..3cbbdda97 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +version + +Core ++ [extractor/common] Add protocol for f4m formats +* [f4m] Prefer baseURL for relative URLs (#14660) +* [extractor/common] Respect URL query in _extract_wowza_formats (14645) + +Extractors ++ [hotstar:playlist] Add support for playlists (#12465) +* [hotstar] Bypass geo restriction (#14672) +- [22tracks] Remove extractor (#11024, #14628) ++ [skysport] Sdd support ooyala videos protected with embed_token (#14641) +* [gamespot] Extract formats referenced with new data fields (#14652) +* [spankbang] Detect unavailable videos (#14644) + + version 2017.10.29 Core From f34b841b51be6872914ffe17b210c54b0d823c3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 6 Nov 2017 22:39:24 +0700 Subject: [PATCH 05/41] release 2017.11.06 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 +-- youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 881475878..be6e6ddab 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.29*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.29** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.11.06*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.11.06** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.10.29 +[debug] youtube-dl version 2017.11.06 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 3cbbdda97..8af368274 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.11.06 Core + [extractor/common] Add protocol for f4m formats diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 7b8e7403a..6009df571 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -3,8 +3,6 @@ - **1up.com** - **20min** - **220.ro** - - **22tracks:genre** - - **22tracks:track** - **24video** - **3qsdn**: 3Q SDN - **3sat** @@ -342,6 +340,7 @@ - **HornBunny** - **HotNewHipHop** - **HotStar** + - **hotstar:playlist** - **Howcast** - **HowStuffWorks** - **HRTi** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 43f080bc3..8b67d23fe 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.10.29' +__version__ = '2017.11.06' From cc6a960e134614f8af2a42dcd8bf146d63638a3c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 8 Nov 2017 20:30:05 +0100 Subject: [PATCH 06/41] use older login method(closes #11572) --- youtube_dl/extractor/crunchyroll.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 8bdaf0c2c..18ef3da10 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -43,6 +43,17 @@ class CrunchyrollBaseIE(InfoExtractor): if username is None: return + self._download_webpage( + 'https://www.crunchyroll.com/?a=formhandler', + None, 'Logging in', 'Wrong login info', + data=urlencode_postdata({ + 'formname': 'RpcApiUser_Login', + 'next_url': 'https://www.crunchyroll.com/acct/membership', + 'name': username, + 'password': password, + })) + + ''' login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') @@ -86,6 +97,7 @@ class CrunchyrollBaseIE(InfoExtractor): raise ExtractorError('Unable to login: %s' % error, expected=True) raise ExtractorError('Unable to log in') + ''' def _real_initialize(self): self._login() From 4222346fb2f42af10ac902cd46469d23923cf114 Mon Sep 17 00:00:00 2001 From: hcwhan Date: Tue, 7 Nov 2017 17:59:09 +0800 Subject: [PATCH 07/41] [pandatv] Update API URL and sign format URLs --- youtube_dl/extractor/pandatv.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/pandatv.py b/youtube_dl/extractor/pandatv.py index c86d70771..c99a1bb1f 100644 --- a/youtube_dl/extractor/pandatv.py +++ b/youtube_dl/extractor/pandatv.py @@ -6,6 +6,7 @@ from ..utils import ( ExtractorError, qualities, ) +import json class PandaTVIE(InfoExtractor): @@ -33,7 +34,7 @@ class PandaTVIE(InfoExtractor): video_id = self._match_id(url) config = self._download_json( - 'https://www.panda.tv/api_room?roomid=%s' % video_id, video_id) + 'https://www.panda.tv/api_room_v2?roomid=%s' % video_id, video_id) error_code = config.get('errno', 0) if error_code is not 0: @@ -66,6 +67,11 @@ class PandaTVIE(InfoExtractor): plflag1 = '4' live_panda = 'live_panda' if plflag0 < 1 else '' + plflag_auth = json.loads(video_info["plflag_list"]) + sign = plflag_auth["auth"]["sign"] + ts = plflag_auth["auth"]["time"] + rid = plflag_auth["auth"]["rid"] + quality_key = qualities(['OD', 'HD', 'SD']) suffix = ['_small', '_mid', ''] formats = [] @@ -77,8 +83,8 @@ class PandaTVIE(InfoExtractor): continue for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))): formats.append({ - 'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s' - % (pl, plflag1, room_key, live_panda, suffix[quality], ext), + 'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s?sign=%s&ts=%s&rid=%s' + % (pl, plflag1, room_key, live_panda, suffix[quality], ext, sign, ts, rid), 'format_id': '%s-%s' % (k, ext), 'quality': quality, 'source_preference': pref, From 61fb07e156671159353ae19a152926cab277ac87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 9 Nov 2017 23:25:43 +0700 Subject: [PATCH 08/41] [pandatv] Modernize (closes #14693) --- youtube_dl/extractor/pandatv.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/pandatv.py b/youtube_dl/extractor/pandatv.py index c99a1bb1f..13a2e7efc 100644 --- a/youtube_dl/extractor/pandatv.py +++ b/youtube_dl/extractor/pandatv.py @@ -6,7 +6,6 @@ from ..utils import ( ExtractorError, qualities, ) -import json class PandaTVIE(InfoExtractor): @@ -67,10 +66,10 @@ class PandaTVIE(InfoExtractor): plflag1 = '4' live_panda = 'live_panda' if plflag0 < 1 else '' - plflag_auth = json.loads(video_info["plflag_list"]) - sign = plflag_auth["auth"]["sign"] - ts = plflag_auth["auth"]["time"] - rid = plflag_auth["auth"]["rid"] + plflag_auth = self._parse_json(video_info['plflag_list'], video_id) + sign = plflag_auth['auth']['sign'] + ts = plflag_auth['auth']['time'] + rid = plflag_auth['auth']['rid'] quality_key = qualities(['OD', 'HD', 'SD']) suffix = ['_small', '_mid', ''] From a9543e37c8e460e69a8556c8e5004ebd8e9b4da4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 11 Nov 2017 00:29:08 +0800 Subject: [PATCH 09/41] [wsj] Recognize another URL pattern (closes #14704) --- ChangeLog | 6 ++++++ youtube_dl/extractor/wsj.py | 5 ++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 8af368274..cedab4723 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [wsj] Recognize another URL pattern (#14704) + + version 2017.11.06 Core diff --git a/youtube_dl/extractor/wsj.py b/youtube_dl/extractor/wsj.py index 9b5487710..67236f377 100644 --- a/youtube_dl/extractor/wsj.py +++ b/youtube_dl/extractor/wsj.py @@ -13,7 +13,7 @@ class WSJIE(InfoExtractor): _VALID_URL = r'''(?x) (?: https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=| - https?://(?:www\.)?(?:wsj|barrons)\.com/video/[^/]+/| + https?://(?:www\.)?(?:wsj|barrons)\.com/video/(?:[^/]+/)+| wsj: ) (?P[a-fA-F0-9-]{36}) @@ -38,6 +38,9 @@ class WSJIE(InfoExtractor): }, { 'url': 'http://www.barrons.com/video/capitalism-deserves-more-respect-from-millennials/F301217E-6F46-43AE-B8D2-B7180D642EE9.html', 'only_matching': True, + }, { + 'url': 'https://www.wsj.com/video/series/a-brief-history-of/the-modern-cell-carrier-how-we-got-here/980E2187-401D-48A1-B82B-1486CEE06CB9', + 'only_matching': True, }] def _real_extract(self, url): From 59d2e6d04f621f41a72a232b8c93250991b4ae5c Mon Sep 17 00:00:00 2001 From: gkoelln Date: Fri, 10 Nov 2017 15:59:48 -0600 Subject: [PATCH 10/41] [cartoonnetwork] Update tokenizer_src (closes #14666) --- youtube_dl/extractor/cartoonnetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cartoonnetwork.py b/youtube_dl/extractor/cartoonnetwork.py index 086ec90c9..6aeebd7b3 100644 --- a/youtube_dl/extractor/cartoonnetwork.py +++ b/youtube_dl/extractor/cartoonnetwork.py @@ -31,7 +31,7 @@ class CartoonNetworkIE(TurnerBaseIE): 'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, { 'secure': { 'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big', - 'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do', + 'tokenizer_src': 'https://token.vgtf.net/token/token_mobile', }, }, { 'url': url, From a5203935d6cb753bafaf67164553027b62c01781 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 11 Nov 2017 12:41:15 +0100 Subject: [PATCH 11/41] [gamespot] skip Brightcove Once http formats(#14652) --- youtube_dl/extractor/gamespot.py | 3 ++- youtube_dl/extractor/once.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 6d177cbaf..be1ed8b4a 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -108,7 +108,8 @@ class GameSpotIE(OnceIE): onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri') if onceux_url: formats.extend(self._extract_once_formats(re.sub( - r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url))) + r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url), + skip_http_formats=True)) if not formats: for quality in ['sd', 'hd']: diff --git a/youtube_dl/extractor/once.py b/youtube_dl/extractor/once.py index a637c8ecf..6ba6fe5d3 100644 --- a/youtube_dl/extractor/once.py +++ b/youtube_dl/extractor/once.py @@ -11,7 +11,7 @@ class OnceIE(InfoExtractor): ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8' PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4' - def _extract_once_formats(self, url): + def _extract_once_formats(self, url, skip_http_formats=False): domain_id, application_id, media_item_id = re.match( OnceIE._VALID_URL, url).groups() formats = self._extract_m3u8_formats( @@ -27,7 +27,7 @@ class OnceIE(InfoExtractor): rendition_id = self._search_regex( r'/now/media/playlist/[^/]+/[^/]+/([^/]+)', adaptive_format['url'], 'redition id', default=None) - if rendition_id: + if rendition_id and not skip_http_formats: progressive_format = adaptive_format.copy() progressive_format.update({ 'url': self.PROGRESSIVE_URL_TEMPLATE % ( From 79d1f8ed6803b6097f0f3cd57f72e0378bdc1f34 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 11 Nov 2017 13:02:39 +0100 Subject: [PATCH 12/41] [gamespot] add support for article URLS(closes #14652) --- youtube_dl/extractor/gamespot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index be1ed8b4a..e6d6d9b1d 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -14,7 +14,7 @@ from ..utils import ( class GameSpotIE(OnceIE): - _VALID_URL = r'https?://(?:www\.)?gamespot\.com/videos/(?:[^/]+/\d+-|embed/)(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article)s/(?:[^/]+/\d+-|embed/)(?P\d+)' _TESTS = [{ 'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/', 'md5': 'b2a30deaa8654fcccd43713a6b6a4825', From e4d9586562d24cbbea6ee07162290ec602399f37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 11 Nov 2017 20:49:03 +0700 Subject: [PATCH 13/41] Remove sensitive data from logging in messages --- youtube_dl/extractor/animeondemand.py | 2 +- youtube_dl/extractor/atresplayer.py | 2 +- youtube_dl/extractor/bambuser.py | 2 +- youtube_dl/extractor/dramafever.py | 2 +- youtube_dl/extractor/funimation.py | 2 +- youtube_dl/extractor/noco.py | 2 +- youtube_dl/extractor/patreon.py | 2 +- youtube_dl/extractor/pluralsight.py | 2 +- youtube_dl/extractor/roosterteeth.py | 2 +- youtube_dl/extractor/safari.py | 2 +- youtube_dl/extractor/twitch.py | 2 +- youtube_dl/extractor/udemy.py | 2 +- youtube_dl/extractor/viki.py | 2 +- youtube_dl/extractor/vk.py | 2 +- 14 files changed, 14 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py index 69d363311..34c2b363e 100644 --- a/youtube_dl/extractor/animeondemand.py +++ b/youtube_dl/extractor/animeondemand.py @@ -78,7 +78,7 @@ class AnimeOnDemandIE(InfoExtractor): post_url = urljoin(self._LOGIN_URL, post_url) response = self._download_webpage( - post_url, None, 'Logging in as %s' % username, + post_url, None, 'Logging in', data=urlencode_postdata(login_form), headers={ 'Referer': self._LOGIN_URL, }) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 01fa308ff..1a31ebe08 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -87,7 +87,7 @@ class AtresPlayerIE(InfoExtractor): self._LOGIN_URL, urlencode_postdata(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') response = self._download_webpage( - request, None, 'Logging in as %s' % username) + request, None, 'Logging in') error = self._html_search_regex( r'(?s)]+class="[^"]*\blist_error\b[^"]*">(.+?)', diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index 0eb1930c2..633c57553 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -59,7 +59,7 @@ class BambuserIE(InfoExtractor): self._LOGIN_URL, urlencode_postdata(login_form)) request.add_header('Referer', self._LOGIN_URL) response = self._download_webpage( - request, None, 'Logging in as %s' % username) + request, None, 'Logging in') login_error = self._html_search_regex( r'(?s)
(.+?)
', diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 95883a037..6b60e542b 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -54,7 +54,7 @@ class DramaFeverBaseIE(AMPIE): request = sanitized_Request( self._LOGIN_URL, urlencode_postdata(login_form)) response = self._download_webpage( - request, None, 'Logging in as %s' % username) + request, None, 'Logging in') if all(logout_pattern not in response for logout_pattern in ['href="/accounts/logout/"', '>Log out<']): diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index 8c37509ec..107f658ba 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -57,7 +57,7 @@ class FunimationIE(InfoExtractor): try: data = self._download_json( 'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/', - None, 'Logging in as %s' % username, data=urlencode_postdata({ + None, 'Logging in', data=urlencode_postdata({ 'username': username, 'password': password, })) diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index 8b83e1f76..a9f9b10c4 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -70,7 +70,7 @@ class NocoIE(InfoExtractor): return login = self._download_json( - self._LOGIN_URL, None, 'Logging in as %s' % username, + self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata({ 'a': 'login', 'cookie': '1', diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py index a6a2c273f..d4b1d34ca 100644 --- a/youtube_dl/extractor/patreon.py +++ b/youtube_dl/extractor/patreon.py @@ -67,7 +67,7 @@ class PatreonIE(InfoExtractor): 'https://www.patreon.com/processLogin', compat_urllib_parse_urlencode(login_form).encode('utf-8') ) - login_page = self._download_webpage(request, None, note='Logging in as %s' % username) + login_page = self._download_webpage(request, None, note='Logging in') if re.search(r'onLoginFailed', login_page): raise ExtractorError('Unable to login, incorrect username and/or password', expected=True) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index f6a9131b1..4bf0aa786 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -116,7 +116,7 @@ class PluralsightIE(PluralsightBaseIE): post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) response = self._download_webpage( - post_url, None, 'Logging in as %s' % username, + post_url, None, 'Logging in', data=urlencode_postdata(login_form), headers={'Content-Type': 'application/x-www-form-urlencoded'}) diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py index 46dfc78f5..8b703800e 100644 --- a/youtube_dl/extractor/roosterteeth.py +++ b/youtube_dl/extractor/roosterteeth.py @@ -68,7 +68,7 @@ class RoosterTeethIE(InfoExtractor): login_request = self._download_webpage( self._LOGIN_URL, None, - note='Logging in as %s' % username, + note='Logging in', data=urlencode_postdata(login_form), headers={ 'Referer': self._LOGIN_URL, diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index 909a6ba97..cc6698f88 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -61,7 +61,7 @@ class SafariBaseIE(InfoExtractor): request = sanitized_Request( self._LOGIN_URL, urlencode_postdata(login_form), headers=headers) login_page = self._download_webpage( - request, None, 'Logging in as %s' % username) + request, None, 'Logging in') if not is_logged(login_page): raise ExtractorError( diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index fefcd2807..bf57eac01 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -101,7 +101,7 @@ class TwitchBaseIE(InfoExtractor): fail(clean_html(login_page)) redirect_page, handle = login_step( - login_page, handle, 'Logging in as %s' % username, { + login_page, handle, 'Logging in', { 'username': username, 'password': password, }) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 207c4a6a7..c248ea727 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -164,7 +164,7 @@ class UdemyIE(InfoExtractor): }) response = self._download_webpage( - self._LOGIN_URL, None, 'Logging in as %s' % username, + self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), headers={ 'Referer': self._ORIGIN_URL, diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 853e5c75f..ad2a2a4b7 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -99,7 +99,7 @@ class VikiBaseIE(InfoExtractor): login = self._call_api( 'sessions.json', None, - 'Logging in as %s' % username, post_data=login_form) + 'Logging in', post_data=login_form) self._token = login.get('token') if not self._token: diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 105e172d5..0d8376522 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -67,7 +67,7 @@ class VKBaseIE(InfoExtractor): login_page = self._download_webpage( 'https://login.vk.com/?act=login', None, - note='Logging in as %s' % username, + note='Logging in', data=urlencode_postdata(login_form)) if re.search(r'onLoginFailed', login_page): From af85ce29c61749676ab934a2b297505ab33bf4c7 Mon Sep 17 00:00:00 2001 From: Bob Poekert Date: Sat, 11 Nov 2017 22:25:21 -0800 Subject: [PATCH 14/41] [ccma] Fix typo --- youtube_dl/extractor/ccma.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ccma.py b/youtube_dl/extractor/ccma.py index 39938c9ac..bec0a825a 100644 --- a/youtube_dl/extractor/ccma.py +++ b/youtube_dl/extractor/ccma.py @@ -93,7 +93,7 @@ class CCMAIE(InfoExtractor): 'description': clean_html(informacio.get('descripcio')), 'duration': duration, 'timestamp': timestamp, - 'thumnails': thumbnails, + 'thumbnails': thumbnails, 'subtitles': subtitles, 'formats': formats, } From 5fc12b954971f5f63d1e87b05e8b01a9ae0e3b01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 12 Nov 2017 18:35:17 +0700 Subject: [PATCH 15/41] [instagram:user] Fix extraction (closes #14699) --- youtube_dl/extractor/instagram.py | 121 +++++++++++++++--------------- 1 file changed, 59 insertions(+), 62 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 4667335e0..20db31f86 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +import itertools import re from .common import InfoExtractor @@ -7,7 +8,6 @@ from ..compat import compat_str from ..utils import ( get_element_by_attribute, int_or_none, - limit_length, lowercase_escape, try_get, ) @@ -212,7 +212,7 @@ class InstagramIE(InfoExtractor): class InstagramUserIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P[^/]{2,})/?(?:$|[?#])' + _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P[^/]{2,})/?(?:$|[?#])' IE_DESC = 'Instagram user profile' IE_NAME = 'instagram:user' _TEST = { @@ -221,82 +221,79 @@ class InstagramUserIE(InfoExtractor): 'id': 'porsche', 'title': 'porsche', }, - 'playlist_mincount': 2, - 'playlist': [{ - 'info_dict': { - 'id': '614605558512799803_462752227', - 'ext': 'mp4', - 'title': '#Porsche Intelligent Performance.', - 'thumbnail': r're:^https?://.*\.jpg', - 'uploader': 'Porsche', - 'uploader_id': 'porsche', - 'timestamp': 1387486713, - 'upload_date': '20131219', - }, - }], + 'playlist_count': 5, 'params': { 'extract_flat': True, 'skip_download': True, + 'playlistend': 5, } } - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - uploader_id = mobj.group('username') + def _entries(self, uploader_id): + query = { + '__a': 1, + } - entries = [] - page_count = 0 - media_url = 'http://instagram.com/%s/media' % uploader_id - while True: + def get_count(kind): + return int_or_none(try_get( + node, lambda x: x['%ss' % kind]['count'])) + + for page_num in itertools.count(1): page = self._download_json( - media_url, uploader_id, - note='Downloading page %d ' % (page_count + 1), - ) - page_count += 1 + 'https://instagram.com/%s/' % uploader_id, uploader_id, + note='Downloading page %d' % page_num, + fatal=False, query=query) + if not page: + break - for it in page['items']: - if it.get('type') != 'video': + nodes = try_get(page, lambda x: x['user']['media']['nodes'], list) + if not nodes: + break + + max_id = None + + for node in nodes: + node_id = node.get('id') + if node_id: + max_id = node_id + + if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True: + continue + video_id = node.get('code') + if not video_id: continue - like_count = int_or_none(it.get('likes', {}).get('count')) - user = it.get('user', {}) - formats = [{ - 'format_id': k, - 'height': v.get('height'), - 'width': v.get('width'), - 'url': v['url'], - } for k, v in it['videos'].items()] - self._sort_formats(formats) + info = self.url_result( + 'https://instagram.com/p/%s/' % video_id, + ie=InstagramIE.ie_key(), video_id=video_id) - thumbnails_el = it.get('images', {}) - thumbnail = thumbnails_el.get('thumbnail', {}).get('url') + description = try_get( + node, [lambda x: x['caption'], lambda x: x['text']['id']], + compat_str) + thumbnail = node.get('thumbnail_src') or node.get('display_src') + timestamp = int_or_none(node.get('date')) - # In some cases caption is null, which corresponds to None - # in python. As a result, it.get('caption', {}) gives None - title = (it.get('caption') or {}).get('text', it['id']) + comment_count = get_count('comment') + like_count = get_count('like') + view_count = int_or_none(node.get('video_views')) - entries.append({ - 'id': it['id'], - 'title': limit_length(title, 80), - 'formats': formats, + info.update({ + 'description': description, 'thumbnail': thumbnail, - 'webpage_url': it.get('link'), - 'uploader': user.get('full_name'), - 'uploader_id': user.get('username'), + 'timestamp': timestamp, + 'comment_count': comment_count, 'like_count': like_count, - 'timestamp': int_or_none(it.get('created_time')), + 'view_count': view_count, }) - if not page['items']: - break - max_id = page['items'][-1]['id'].split('_')[0] - media_url = ( - 'http://instagram.com/%s/media?max_id=%s' % ( - uploader_id, max_id)) + yield info - return { - '_type': 'playlist', - 'entries': entries, - 'id': uploader_id, - 'title': uploader_id, - } + if not max_id: + break + + query['max_id'] = max_id + + def _real_extract(self, url): + uploader_id = self._match_id(url) + return self.playlist_result( + self._entries(uploader_id), uploader_id, uploader_id) From d4e31b72b971172ffdee7fbe3070d20e4454259c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 13 Nov 2017 10:24:35 +0100 Subject: [PATCH 16/41] [gamespot] lower the preference of http formats(#14652) --- youtube_dl/extractor/gamespot.py | 2 +- youtube_dl/extractor/once.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index e6d6d9b1d..a9606a02c 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -109,7 +109,7 @@ class GameSpotIE(OnceIE): if onceux_url: formats.extend(self._extract_once_formats(re.sub( r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url), - skip_http_formats=True)) + http_formats_preference=-1)) if not formats: for quality in ['sd', 'hd']: diff --git a/youtube_dl/extractor/once.py b/youtube_dl/extractor/once.py index 6ba6fe5d3..8ae5fadd8 100644 --- a/youtube_dl/extractor/once.py +++ b/youtube_dl/extractor/once.py @@ -11,7 +11,7 @@ class OnceIE(InfoExtractor): ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8' PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4' - def _extract_once_formats(self, url, skip_http_formats=False): + def _extract_once_formats(self, url, http_formats_preference=None): domain_id, application_id, media_item_id = re.match( OnceIE._VALID_URL, url).groups() formats = self._extract_m3u8_formats( @@ -27,7 +27,7 @@ class OnceIE(InfoExtractor): rendition_id = self._search_regex( r'/now/media/playlist/[^/]+/[^/]+/([^/]+)', adaptive_format['url'], 'redition id', default=None) - if rendition_id and not skip_http_formats: + if rendition_id: progressive_format = adaptive_format.copy() progressive_format.update({ 'url': self.PROGRESSIVE_URL_TEMPLATE % ( @@ -35,6 +35,7 @@ class OnceIE(InfoExtractor): 'format_id': adaptive_format['format_id'].replace( 'hls', 'http'), 'protocol': 'http', + 'preference': http_formats_preference, }) progressive_formats.append(progressive_format) self._check_formats(progressive_formats, media_item_id) From 388beb86e0c8e3f76958aa8a258bd396b8b1e0fe Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 13 Nov 2017 10:30:12 +0100 Subject: [PATCH 17/41] [gamespot] add test for #14652 --- youtube_dl/extractor/gamespot.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index a9606a02c..ab647dd41 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -38,6 +38,9 @@ class GameSpotIE(OnceIE): }, { 'url': 'https://www.gamespot.com/videos/embed/6439218/', 'only_matching': True, + }, { + 'url': 'https://www.gamespot.com/articles/the-last-of-us-2-receives-new-ps4-trailer/1100-6454469/', + 'only_matching': True, }] def _real_extract(self, url): From 27adc9ec65be412e07f6e55d9d9b56c1c224d1db Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 13 Nov 2017 11:24:15 +0100 Subject: [PATCH 18/41] [tva] fix extraction(closes #14736) --- youtube_dl/extractor/tva.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/tva.py b/youtube_dl/extractor/tva.py index b57abeaa4..0b863df2f 100644 --- a/youtube_dl/extractor/tva.py +++ b/youtube_dl/extractor/tva.py @@ -32,6 +32,8 @@ class TVAIE(InfoExtractor): video_data = self._download_json( 'https://videos.tva.ca/proxy/item/_' + video_id, video_id, headers={ 'Accept': 'application/json', + }, query={ + 'appId': '5955fc5f23eec60006c951f1', }) def get_attribute(key): From 05dee6c520eb959316a2e58203cbd5d30e908bc3 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 13 Nov 2017 19:15:16 +0100 Subject: [PATCH 19/41] [crunchyroll] extract old rtmp formats --- youtube_dl/extractor/crunchyroll.py | 154 ++++++++++++++++------------ 1 file changed, 91 insertions(+), 63 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 18ef3da10..b53f2d705 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -38,6 +38,16 @@ class CrunchyrollBaseIE(InfoExtractor): _LOGIN_FORM = 'login_form' _NETRC_MACHINE = 'crunchyroll' + def _call_rpc_api(self, method, video_id, note=None, data=None): + data = data or {} + data['req'] = 'RpcApi' + method + data = compat_urllib_parse_urlencode(data).encode('utf-8') + return self._download_xml( + 'http://www.crunchyroll.com/xml/', + video_id, note, fatal=False, data=data, headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + }) + def _login(self): (username, password) = self._get_login_info() if username is None: @@ -377,15 +387,19 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text def _get_subtitles(self, video_id, webpage): subtitles = {} for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage): - sub_page = self._download_webpage( - 'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id, - video_id, note='Downloading subtitles for ' + sub_name) - id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False) - iv = self._search_regex(r'([^<]+)', sub_page, 'subtitle_iv', fatal=False) - data = self._search_regex(r'([^<]+)', sub_page, 'subtitle_data', fatal=False) - if not id or not iv or not data: + sub_doc = self._call_rpc_api( + 'Subtitle_GetXml', video_id, + 'Downloading subtitles for ' + sub_name, data={ + 'subtitle_script_id': sub_id, + }) + if not sub_doc: continue - subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') + sid = sub_doc.get('id') + iv = xpath_text(sub_doc, 'iv', 'subtitle iv') + data = xpath_text(sub_doc, 'data', 'subtitle data') + if not sid or not iv or not data: + continue + subtitle = self._decrypt_subtitles(data, iv, sid).decode('utf-8') lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False) if not lang_code: continue @@ -456,65 +470,79 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text for fmt in available_fmts: stream_quality, stream_format = self._FORMAT_IDS[fmt] video_format = fmt + 'p' - streamdata_req = sanitized_Request( - 'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s' - % (video_id, stream_format, stream_quality), - compat_urllib_parse_urlencode({'current_page': url}).encode('utf-8')) - streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') - streamdata = self._download_xml( - streamdata_req, video_id, - note='Downloading media info for %s' % video_format) - stream_info = streamdata.find('./{default}preload/stream_info') - video_encode_id = xpath_text(stream_info, './video_encode_id') - if video_encode_id in video_encode_ids: - continue - video_encode_ids.append(video_encode_id) + stream_infos = [] + streamdata = self._call_rpc_api( + 'VideoPlayer_GetStandardConfig', video_id, + 'Downloading media info for %s' % video_format, data={ + 'media_id': video_id, + 'video_format': stream_format, + 'video_quality': stream_quality, + 'current_page': url, + }) + if streamdata: + stream_info = streamdata.find('./{default}preload/stream_info') + if stream_info: + stream_infos.append(stream_info) + stream_info = self._call_rpc_api( + 'VideoEncode_GetStreamInfo', video_id, + 'Downloading stream info for %s' % video_format, data={ + 'media_id': video_id, + 'video_format': stream_format, + 'video_encode_quality': stream_quality, + }) + if stream_info: + stream_infos.append(stream_info) + for stream_info in stream_infos: + video_encode_id = xpath_text(stream_info, './video_encode_id') + if video_encode_id in video_encode_ids: + continue + video_encode_ids.append(video_encode_id) - video_file = xpath_text(stream_info, './file') - if not video_file: - continue - if video_file.startswith('http'): - formats.extend(self._extract_m3u8_formats( - video_file, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - continue - - video_url = xpath_text(stream_info, './host') - if not video_url: - continue - metadata = stream_info.find('./metadata') - format_info = { - 'format': video_format, - 'format_id': video_format, - 'height': int_or_none(xpath_text(metadata, './height')), - 'width': int_or_none(xpath_text(metadata, './width')), - } - - if '.fplive.net/' in video_url: - video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip()) - parsed_video_url = compat_urlparse.urlparse(video_url) - direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace( - netloc='v.lvlt.crcdn.net', - path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1]))) - if self._is_valid_url(direct_video_url, video_id, video_format): - format_info.update({ - 'url': direct_video_url, - }) - formats.append(format_info) + video_file = xpath_text(stream_info, './file') + if not video_file: + continue + if video_file.startswith('http'): + formats.extend(self._extract_m3u8_formats( + video_file, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) continue - format_info.update({ - 'url': video_url, - 'play_path': video_file, - 'ext': 'flv', - }) - formats.append(format_info) - self._sort_formats(formats) + video_url = xpath_text(stream_info, './host') + if not video_url: + continue + metadata = stream_info.find('./metadata') + format_info = { + 'format': video_format, + 'height': int_or_none(xpath_text(metadata, './height')), + 'width': int_or_none(xpath_text(metadata, './width')), + } - metadata = self._download_xml( - 'http://www.crunchyroll.com/xml', video_id, - note='Downloading media info', query={ - 'req': 'RpcApiVideoPlayer_GetMediaMetadata', + if '.fplive.net/' in video_url: + video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip()) + parsed_video_url = compat_urlparse.urlparse(video_url) + direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace( + netloc='v.lvlt.crcdn.net', + path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1]))) + if self._is_valid_url(direct_video_url, video_id, video_format): + format_info.update({ + 'format_id': 'http-' + video_format, + 'url': direct_video_url, + }) + formats.append(format_info) + continue + + format_info.update({ + 'format_id': 'rtmp-' + video_format, + 'url': video_url, + 'play_path': video_file, + 'ext': 'flv', + }) + formats.append(format_info) + self._sort_formats(formats, ('height', 'width', 'tbr', 'fps')) + + metadata = self._call_rpc_api( + 'VideoPlayer_GetMediaMetadata', video_id, + note='Downloading media info', data={ 'media_id': video_id, }) From 5871ebac473e723376722a37baecf51d6ae7d781 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 14 Nov 2017 01:43:20 +0700 Subject: [PATCH 20/41] [YoutubeDL] Fix playlist range optimization for --playlist-items (closes #14740) --- youtube_dl/YoutubeDL.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 342d6b47c..68721e9ab 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -948,7 +948,8 @@ class YoutubeDL(object): report_download(n_entries) else: # iterable if playlistitems: - entries = make_playlistitems_entries(list(ie_entries)) + entries = make_playlistitems_entries(list(itertools.islice( + ie_entries, 0, max(playlistitems)))) else: entries = list(itertools.islice( ie_entries, playliststart, playlistend)) From 0987f2ddb27a27506c697ad9dae2ccbf24fc786d Mon Sep 17 00:00:00 2001 From: Timendum Date: Tue, 14 Nov 2017 16:34:45 +0100 Subject: [PATCH 21/41] [vshare] Fix extraction (closes #14473) --- youtube_dl/extractor/generic.py | 16 +++++++++++++++ youtube_dl/extractor/vshare.py | 36 +++++++++++++++++++++++++-------- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2a9c3e2de..31564e550 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -102,6 +102,7 @@ from .joj import JojIE from .megaphone import MegaphoneIE from .vzaar import VzaarIE from .channel9 import Channel9IE +from .vshare import VShareIE class GenericIE(InfoExtractor): @@ -1921,6 +1922,16 @@ class GenericIE(InfoExtractor): 'title': 'Rescue Kit 14 Free Edition - Getting started', }, 'playlist_count': 4, + }, + { + # vshare embed + 'url': 'https://youtube-dl-demo.neocities.org/vshare.html', + 'md5': '17b39f55b5497ae8b59f5fbce8e35886', + 'info_dict': { + 'id': '0f64ce6', + 'title': 'vl14062007715967', + 'ext': 'mp4', + } } # { # # TODO: find another test @@ -2879,6 +2890,11 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( channel9_urls, video_id, video_title, ie=Channel9IE.ie_key()) + vshare_urls = VShareIE._extract_urls(webpage) + if vshare_urls: + return self.playlist_from_matches( + vshare_urls, video_id, video_title, ie=VShareIE.ie_key()) + def merge_dicts(dict1, dict2): merged = {} for k, v in dict1.items(): diff --git a/youtube_dl/extractor/vshare.py b/youtube_dl/extractor/vshare.py index 5addbc280..ea39a9051 100644 --- a/youtube_dl/extractor/vshare.py +++ b/youtube_dl/extractor/vshare.py @@ -1,14 +1,18 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor +from ..compat import compat_chr +from ..utils import decode_packed_codes class VShareIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://vshare.io/d/0f64ce6', - 'md5': '16d7b8fef58846db47419199ff1ab3e7', + 'md5': '17b39f55b5497ae8b59f5fbce8e35886', 'info_dict': { 'id': '0f64ce6', 'title': 'vl14062007715967', @@ -19,20 +23,36 @@ class VShareIE(InfoExtractor): 'only_matching': True, }] + def _extract_packed(self, webpage): + packed = self._search_regex(r'(eval\(function.+)', webpage, 'packed code') + unpacked = decode_packed_codes(packed) + digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits') + digits = digits.split(',') + digits = [int(digit) for digit in digits] + key_digit = self._search_regex(r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit') + chars = [compat_chr(d - int(key_digit)) for d in digits] + return ''.join(chars) + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( - 'https://vshare.io/d/%s' % video_id, video_id) + 'https://vshare.io/v/%s/width-650/height-430/1' % video_id, video_id) - title = self._html_search_regex( - r'(?s)
(.+?)
', webpage, 'title') - video_url = self._search_regex( - r']+href=(["\'])(?P(?:https?:)?//.+?)\1[^>]*>[Cc]lick\s+here', - webpage, 'video url', group='url') + title = self._html_search_regex(r'([^<]+)', webpage, 'title') + title = title.split(' - ')[0] + unpacked = self._extract_packed(webpage) + video_urls = re.findall(r']+?src=["\'](?P(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)', + webpage) From ff31f2d5c3750364b013a9bf59b85cebd0cee1fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 14 Nov 2017 22:39:54 +0700 Subject: [PATCH 22/41] [vshare] Capture and output error message --- youtube_dl/extractor/vshare.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vshare.py b/youtube_dl/extractor/vshare.py index ea39a9051..20ce22e16 100644 --- a/youtube_dl/extractor/vshare.py +++ b/youtube_dl/extractor/vshare.py @@ -5,7 +5,10 @@ import re from .common import InfoExtractor from ..compat import compat_chr -from ..utils import decode_packed_codes +from ..utils import ( + decode_packed_codes, + ExtractorError, +) class VShareIE(InfoExtractor): @@ -42,6 +45,12 @@ class VShareIE(InfoExtractor): title = self._html_search_regex(r'([^<]+)', webpage, 'title') title = title.split(' - ')[0] + error = self._html_search_regex( + r'(?s)]+\bclass=["\']xxx-error[^>]+>(.+?) Date: Tue, 14 Nov 2017 22:49:25 +0700 Subject: [PATCH 23/41] [vshare] Improve extraction, fix formats sorting and carry long lines --- youtube_dl/extractor/vshare.py | 41 ++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/vshare.py b/youtube_dl/extractor/vshare.py index 20ce22e16..e4ec77889 100644 --- a/youtube_dl/extractor/vshare.py +++ b/youtube_dl/extractor/vshare.py @@ -26,13 +26,20 @@ class VShareIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return re.findall( + r']+?src=["\'](?P(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)', + webpage) + def _extract_packed(self, webpage): - packed = self._search_regex(r'(eval\(function.+)', webpage, 'packed code') + packed = self._search_regex( + r'(eval\(function.+)', webpage, 'packed code') unpacked = decode_packed_codes(packed) digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits') - digits = digits.split(',') - digits = [int(digit) for digit in digits] - key_digit = self._search_regex(r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit') + digits = [int(digit) for digit in digits.split(',')] + key_digit = self._search_regex( + r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit') chars = [compat_chr(d - int(key_digit)) for d in digits] return ''.join(chars) @@ -40,9 +47,11 @@ class VShareIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - 'https://vshare.io/v/%s/width-650/height-430/1' % video_id, video_id) + 'https://vshare.io/v/%s/width-650/height-430/1' % video_id, + video_id) - title = self._html_search_regex(r'([^<]+)', webpage, 'title') + title = self._html_search_regex( + r'([^<]+)', webpage, 'title') title = title.split(' - ')[0] error = self._html_search_regex( @@ -51,17 +60,15 @@ class VShareIE(InfoExtractor): if error: raise ExtractorError(error, expected=True) - unpacked = self._extract_packed(webpage) - video_urls = re.findall(r'%s' % self._extract_packed(webpage), + video_id)[0] + + self._sort_formats(info['formats']) + + info.update({ 'id': video_id, 'title': title, - 'formats': formats, - } + }) - @staticmethod - def _extract_urls(webpage): - return re.findall( - r']+?src=["\'](?P(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)', - webpage) + return info From ea2295842f79c9efff3a9abce1d0eee7de4953d6 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 14 Nov 2017 17:41:30 +0100 Subject: [PATCH 24/41] [common] skip Apple FairPlay m3u8 manifests(closes #14741) --- youtube_dl/extractor/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e2d9f52b0..a9d68fc0c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1356,6 +1356,9 @@ class InfoExtractor(object): if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access return [] + if re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc): # Apple FairPlay + return [] + formats = [] format_url = lambda u: ( From fae0eb42ec4309fe7fb8476d30621ba1d60fa168 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 14 Nov 2017 23:59:30 +0700 Subject: [PATCH 25/41] [ChangeLog] Actualize --- ChangeLog | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ChangeLog b/ChangeLog index cedab4723..6ed0f011f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,25 @@ version +Core +* [common] Skip Apple FairPlay m3u8 manifests (#14741) +* [YoutubeDL] Fix playlist range optimization for --playlist-items (#14740) + Extractors +* [vshare] Capture and output error message +* [vshare] Fix extraction (#14473) +* [crunchyroll] Extract old RTMP formats +* [tva] Fix extraction (#14736) +* [gamespot] Lower preference of HTTP formats (#14652) +* [instagram:user] Fix extraction (#14699) +* [ccma] Fix typo (#14730) +- Remove sensitive data from logging in messages +* [instagram:user] Fix extraction (#14699) ++ [gamespot] Add support for article URLs (#14652) +* [gamespot] Skip Brightcove Once HTTP formats (#14652) +* [cartoonnetwork] Update tokenizer_src (#14666) + [wsj] Recognize another URL pattern (#14704) +* [pandatv] Update API URL and sign format URLs (#14693) +* [crunchyroll] Use old login method (#11572) version 2017.11.06 From 08e45b39e76419f63aa43d5008257789d8a30bf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 15 Nov 2017 00:15:42 +0700 Subject: [PATCH 26/41] release 2017.11.15 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index be6e6ddab..4dd1a6e59 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.11.06*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.11.06** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.11.15*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.11.15** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.11.06 +[debug] youtube-dl version 2017.11.15 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 6ed0f011f..6b4befb8f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.11.15 Core * [common] Skip Apple FairPlay m3u8 manifests (#14741) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8b67d23fe..1c3cbefeb 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.11.06' +__version__ = '2017.11.15' From 9cbd4dda10ad248a5268ec1e0e563cf97024a8b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 15 Nov 2017 22:14:54 +0700 Subject: [PATCH 27/41] [instagram] Fix description, timestamp and counters extraction (closes #14755) --- youtube_dl/extractor/instagram.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 20db31f86..a77f619d2 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -130,13 +130,21 @@ class InstagramIE(InfoExtractor): video_url = media.get('video_url') height = int_or_none(media.get('dimensions', {}).get('height')) width = int_or_none(media.get('dimensions', {}).get('width')) - description = media.get('caption') + description = try_get( + media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], + compat_str) or media.get('caption') thumbnail = media.get('display_src') - timestamp = int_or_none(media.get('date')) + timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date')) uploader = media.get('owner', {}).get('full_name') uploader_id = media.get('owner', {}).get('username') - like_count = int_or_none(media.get('likes', {}).get('count')) - comment_count = int_or_none(media.get('comments', {}).get('count')) + + def get_count(key, kind): + return int_or_none(try_get( + media, (lambda x: x['edge_media_%s' % key]['count'], + lambda x: x['%ss' % kind]['count']))) + like_count = get_count('preview_like', 'like') + comment_count = get_count('to_comment', 'comment') + comments = [{ 'author': comment.get('user', {}).get('username'), 'author_id': comment.get('user', {}).get('id'), From 3192d4bc7a063983c3a82bc4320c16d65679307a Mon Sep 17 00:00:00 2001 From: Windom Date: Thu, 16 Nov 2017 20:05:04 +0200 Subject: [PATCH 28/41] [spankbang] Add support for mobile URLs and fix test --- youtube_dl/extractor/spankbang.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 2863e53b5..e6c2dcfc4 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -7,7 +7,7 @@ from ..utils import ExtractorError class SpankBangIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P[\da-z]+)/video' + _VALID_URL = r'https?://(?:(?:www|m|[a-z]{2})\.)?spankbang\.com/(?P[\da-z]+)/video' _TESTS = [{ 'url': 'http://spankbang.com/3vvn/video/fantasy+solo', 'md5': '1cc433e1d6aa14bc376535b8679302f7', @@ -15,7 +15,7 @@ class SpankBangIE(InfoExtractor): 'id': '3vvn', 'ext': 'mp4', 'title': 'fantasy solo', - 'description': 'Watch fantasy solo free HD porn video - 05 minutes - dillion harper masturbates on a bed free adult movies.', + 'description': 'Watch fantasy solo free HD porn video - 05 minutes - Babe,Masturbation,Solo,Toy - dillion harper masturbates on a bed free adult movies sexy clips.', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'silly2587', 'age_limit': 18, @@ -28,6 +28,10 @@ class SpankBangIE(InfoExtractor): # no uploader 'url': 'http://spankbang.com/lklg/video/sex+with+anyone+wedding+edition+2', 'only_matching': True, + }, { + # mobile page + 'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name', + 'only_matching': True, }] def _real_extract(self, url): From 38db52adf35c2134444e5b6b601e9567797e9195 Mon Sep 17 00:00:00 2001 From: Windom Date: Thu, 16 Nov 2017 20:50:07 +0200 Subject: [PATCH 29/41] [drtuber] Add support for mobile URLs --- youtube_dl/extractor/drtuber.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py index c5d56a9ad..c88b3126b 100644 --- a/youtube_dl/extractor/drtuber.py +++ b/youtube_dl/extractor/drtuber.py @@ -10,7 +10,7 @@ from ..utils import ( class DrTuberIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?drtuber\.com/(?:video|embed)/(?P\d+)(?:/(?P[\w-]+))?' + _VALID_URL = r'https?://(?:(?:www|m)\.)?drtuber\.com/(?:video|embed)/(?P\d+)(?:/(?P[\w-]+))?' _TESTS = [{ 'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf', 'md5': '93e680cf2536ad0dfb7e74d94a89facd', @@ -28,6 +28,9 @@ class DrTuberIE(InfoExtractor): }, { 'url': 'http://www.drtuber.com/embed/489939', 'only_matching': True, + }, { + 'url': 'http://m.drtuber.com/video/3893529/lingerie-blowjob-from-beautiful-teen', + 'only_matching': True, }] @staticmethod From f610dbb05f8d17cc95437958835a437c3777b38c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 18 Nov 2017 19:02:56 +0700 Subject: [PATCH 30/41] [extractor/common] Use final URL when dumping request (closes #14769) --- youtube_dl/extractor/common.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a9d68fc0c..8e4ee0deb 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -592,19 +592,11 @@ class InfoExtractor(object): if not encoding: encoding = self._guess_encoding_from_content(content_type, webpage_bytes) if self._downloader.params.get('dump_intermediate_pages', False): - try: - url = url_or_request.get_full_url() - except AttributeError: - url = url_or_request - self.to_screen('Dumping request to ' + url) + self.to_screen('Dumping request to ' + urlh.geturl()) dump = base64.b64encode(webpage_bytes).decode('ascii') self._downloader.to_screen(dump) if self._downloader.params.get('write_pages', False): - try: - url = url_or_request.get_full_url() - except AttributeError: - url = url_or_request - basen = '%s_%s' % (video_id, url) + basen = '%s_%s' % (video_id, urlh.geturl()) if len(basen) > 240: h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest() basen = basen[:240 - len(h)] + h From a9efdf3d4a18ec5657ea50f31715e1b88a945820 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 19 Nov 2017 12:59:31 +0100 Subject: [PATCH 31/41] [livestream] make smil extraction non fatal(fixes #14792) --- youtube_dl/extractor/livestream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 317ebbc4e..c4776bbf3 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -114,7 +114,7 @@ class LivestreamIE(InfoExtractor): smil_url = video_data.get('smil_url') if smil_url: - formats.extend(self._extract_smil_formats(smil_url, video_id)) + formats.extend(self._extract_smil_formats(smil_url, video_id, fatal=False)) m3u8_url = video_data.get('m3u8_url') if m3u8_url: From 8f639411042d35cd3be6eeff485e3015bafce4d7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 22 Nov 2017 22:49:48 +0800 Subject: [PATCH 32/41] [youku] Fix extraction; update ccode (closes #14815) --- ChangeLog | 6 ++++++ youtube_dl/extractor/youku.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 6b4befb8f..e3b7750f6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [youku] Fix extraction; update ccode (#14815) + + version 2017.11.15 Core diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 0c4bc2eda..6822a30bc 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -154,7 +154,7 @@ class YoukuIE(InfoExtractor): # request basic data basic_data_params = { 'vid': video_id, - 'ccode': '0402' if 'tudou.com' in url else '0401', + 'ccode': '0502', 'client_ip': '192.168.1.1', 'utid': cna, 'client_ts': time.time() / 1000, From 2688664762f406b1ba2913af25ee3a2d2ba58038 Mon Sep 17 00:00:00 2001 From: enigmaquip Date: Wed, 22 Nov 2017 16:39:11 -0700 Subject: [PATCH 33/41] [culturebox] Fix extraction (closes #14827) --- youtube_dl/extractor/francetv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 2bcbb3e39..037e538cc 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -363,6 +363,6 @@ class CultureboxIE(FranceTVBaseInfoExtractor): raise ExtractorError('Video %s is not available' % name, expected=True) video_id, catalogue = self._search_regex( - r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@') + r'"https?://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@') return self._extract_video(video_id, catalogue) From 939be9adfe810ada7dbd5e9032bcfec19fafa14b Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Wed, 22 Nov 2017 11:47:02 -0500 Subject: [PATCH 34/41] [JWPlatform] Support iframes Support content.jwplatform... src attributes inside