From bd65f181532ab4e535b408d3ccf99723534eb326 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 24 Jun 2017 18:33:31 +0700 Subject: [PATCH 01/10] [onetpl] Add support for videos embedded via pulsembed (closes #13482) --- youtube_dl/extractor/onet.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py index 94f57990b..58da1bc27 100644 --- a/youtube_dl/extractor/onet.py +++ b/youtube_dl/extractor/onet.py @@ -11,6 +11,7 @@ from ..utils import ( get_element_by_class, int_or_none, js_to_json, + NO_DEFAULT, parse_iso8601, remove_start, strip_or_none, @@ -198,6 +199,19 @@ class OnetPlIE(InfoExtractor): 'upload_date': '20170214', 'timestamp': 1487078046, }, + }, { + # embedded via pulsembed + 'url': 'http://film.onet.pl/pensjonat-nad-rozlewiskiem-relacja-z-planu-serialu/y428n0', + 'info_dict': { + 'id': '501235.965429946', + 'ext': 'mp4', + 'title': '"Pensjonat nad rozlewiskiem": relacja z planu serialu', + 'upload_date': '20170622', + 'timestamp': 1498159955, + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3', 'only_matching': True, @@ -212,13 +226,25 @@ class OnetPlIE(InfoExtractor): 'only_matching': True, }] + def _search_mvp_id(self, webpage, default=NO_DEFAULT): + return self._search_regex( + r'data-(?:params-)?mvp=["\'](\d+\.\d+)', webpage, 'mvp id', + default=default) + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - mvp_id = self._search_regex( - r'data-params-mvp=["\'](\d+\.\d+)', webpage, 'mvp id') + mvp_id = self._search_mvp_id(webpage, default=None) + + if not mvp_id: + pulsembed_url = self._search_regex( + r'data-src=(["\'])(?P(?:https?:)?//pulsembed\.eu/.+?)\1', + webpage, 'pulsembed url', group='url') + webpage = self._download_webpage( + pulsembed_url, video_id, 'Downloading pulsembed webpage') + mvp_id = self._search_mvp_id(webpage) return self.url_result( 'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id) From 27449ad894f7d49e189686a15399ea66cfb63667 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 25 Jun 2017 01:09:12 +0700 Subject: [PATCH 02/10] [redbulltv] Add support for lives and segments (closes #13486)) --- youtube_dl/extractor/redbulltv.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/redbulltv.py b/youtube_dl/extractor/redbulltv.py index afab62426..c5918afee 100644 --- a/youtube_dl/extractor/redbulltv.py +++ b/youtube_dl/extractor/redbulltv.py @@ -13,7 +13,7 @@ from ..utils import ( class RedBullTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?redbull\.tv/(?:video|film)/(?PAP-\w+)' + _VALID_URL = r'https?://(?:www\.)?redbull\.tv/(?:video|film|live)/(?:AP-\w+/segment/)?(?PAP-\w+)' _TESTS = [{ # film 'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc', @@ -42,6 +42,22 @@ class RedBullTVIE(InfoExtractor): 'season_number': 2, 'episode_number': 4, }, + 'params': { + 'skip_download': True, + }, + }, { + # segment + 'url': 'https://www.redbull.tv/live/AP-1R5DX49XS1W11/segment/AP-1QSAQJ6V52111/semi-finals', + 'info_dict': { + 'id': 'AP-1QSAQJ6V52111', + 'ext': 'mp4', + 'title': 'Semi Finals - Vans Park Series Pro Tour', + 'description': 'md5:306a2783cdafa9e65e39aa62f514fd97', + 'duration': 11791.991, + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'https://www.redbull.tv/film/AP-1MSKKF5T92111/in-motion', 'only_matching': True, From 23aec3d623146d06535a4f5388693c1e9a2bbfde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 25 Jun 2017 01:10:31 +0700 Subject: [PATCH 03/10] [redbulltv] Restore hls format prefix --- youtube_dl/extractor/redbulltv.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/redbulltv.py b/youtube_dl/extractor/redbulltv.py index c5918afee..5d6cc3610 100644 --- a/youtube_dl/extractor/redbulltv.py +++ b/youtube_dl/extractor/redbulltv.py @@ -98,7 +98,8 @@ class RedBullTVIE(InfoExtractor): title = info['title'].strip() formats = self._extract_m3u8_formats( - video['url'], video_id, 'mp4', 'm3u8_native') + video['url'], video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') self._sort_formats(formats) subtitles = {} From 449c66577640a0c3f0b383204a1e7284429a61c3 Mon Sep 17 00:00:00 2001 From: james Date: Sat, 17 Jun 2017 17:15:41 +0200 Subject: [PATCH 04/10] [raiplay:live] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/rai.py | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e97691daa..a263c88b3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -824,6 +824,7 @@ from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE from .rai import ( RaiPlayIE, + RaiPlayLiveIE, RaiIE, ) from .rbmaradio import RBMARadioIE diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 81eb9db85..ed15a5f10 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -208,10 +208,27 @@ class RaiPlayIE(RaiBaseIE): } info.update(relinker_info) - return info +class RaiPlayLiveIE(RaiBaseIE): + _VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P\w*)' + _TEST = { + 'url': 'http://www.raiplay.it/dirette/rai3', + 'only_matching': True, + } + + def _real_extract(self, url): + channel = self._match_id(url) + + webpage = self._download_webpage(url, channel) + re_id = r']*)data-uniquename=(["\'])[\w-]*(?P%s)(\2)([^>]*?)>' % RaiBaseIE._UUID_RE + video_id = self._html_search_regex(re_id, webpage, 'livestream-id', group='id') + + return self.url_result('http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id, + RaiPlayIE.ie_key(), video_id) + + class RaiIE(RaiBaseIE): _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE _TESTS = [{ From 9c48b5a193de754403f4a1ced78f6cf6b3893676 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 25 Jun 2017 01:48:02 +0700 Subject: [PATCH 05/10] [raiplay:live] Improve and add test (closes #13414) --- youtube_dl/extractor/rai.py | 44 +++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index ed15a5f10..e11bf8f9a 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -191,11 +191,12 @@ class RaiPlayIE(RaiBaseIE): info = { 'id': video_id, - 'title': title, + 'title': self._live_title(title) if relinker_info.get( + 'is_live') else title, 'alt_title': media.get('subtitle'), 'description': media.get('description'), - 'uploader': media.get('channel'), - 'creator': media.get('editor'), + 'uploader': strip_or_none(media.get('channel')), + 'creator': strip_or_none(media.get('editor')), 'duration': parse_duration(video.get('duration')), 'timestamp': timestamp, 'thumbnails': thumbnails, @@ -212,21 +213,40 @@ class RaiPlayIE(RaiBaseIE): class RaiPlayLiveIE(RaiBaseIE): - _VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P\w*)' + _VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P[^/?#&]+)' _TEST = { - 'url': 'http://www.raiplay.it/dirette/rai3', - 'only_matching': True, + 'url': 'http://www.raiplay.it/dirette/rainews24', + 'info_dict': { + 'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c', + 'display_id': 'rainews24', + 'ext': 'mp4', + 'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'md5:6eca31500550f9376819f174e5644754', + 'uploader': 'Rai News 24', + 'creator': 'Rai News 24', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + }, } def _real_extract(self, url): - channel = self._match_id(url) + display_id = self._match_id(url) - webpage = self._download_webpage(url, channel) - re_id = r']*)data-uniquename=(["\'])[\w-]*(?P%s)(\2)([^>]*?)>' % RaiBaseIE._UUID_RE - video_id = self._html_search_regex(re_id, webpage, 'livestream-id', group='id') + webpage = self._download_webpage(url, display_id) - return self.url_result('http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id, - RaiPlayIE.ie_key(), video_id) + video_id = self._search_regex( + r'data-uniquename=["\']ContentItem-(%s)' % RaiBaseIE._UUID_RE, + webpage, 'content id') + + return { + '_type': 'url_transparent', + 'ie_key': RaiPlayIE.ie_key(), + 'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id, + 'id': video_id, + 'display_id': display_id, + } class RaiIE(RaiBaseIE): From 5744cf6c03e6f89914d44be7c5d77fca3b121bef Mon Sep 17 00:00:00 2001 From: Argn0 Date: Sat, 24 Jun 2017 20:59:15 +0200 Subject: [PATCH 06/10] [ign] Add another video id pattern (closes #13328) --- youtube_dl/extractor/ign.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index c45c68c1d..c1367cf51 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -89,6 +89,11 @@ class IGNIE(InfoExtractor): 'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds', 'only_matching': True, }, + { + # videoId pattern + 'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned', + 'only_matching': True, + }, ] def _find_video_id(self, webpage): @@ -98,6 +103,8 @@ class IGNIE(InfoExtractor): r'data-video-id="(.+?)"', r' Date: Sun, 25 Jun 2017 02:14:10 +0700 Subject: [PATCH 07/10] [wsj] Add support for barrons.com (closes #13470) --- youtube_dl/extractor/wsj.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/wsj.py b/youtube_dl/extractor/wsj.py index 45cfca7c5..9b5487710 100644 --- a/youtube_dl/extractor/wsj.py +++ b/youtube_dl/extractor/wsj.py @@ -13,7 +13,7 @@ class WSJIE(InfoExtractor): _VALID_URL = r'''(?x) (?: https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=| - https?://(?:www\.)?wsj\.com/video/[^/]+/| + https?://(?:www\.)?(?:wsj|barrons)\.com/video/[^/]+/| wsj: ) (?P[a-fA-F0-9-]{36}) @@ -35,6 +35,9 @@ class WSJIE(InfoExtractor): }, { 'url': 'http://www.wsj.com/video/can-alphabet-build-a-smarter-city/359DDAA8-9AC1-489C-82E6-0429C1E430E0.html', 'only_matching': True, + }, { + 'url': 'http://www.barrons.com/video/capitalism-deserves-more-respect-from-millennials/F301217E-6F46-43AE-B8D2-B7180D642EE9.html', + 'only_matching': True, }] def _real_extract(self, url): From 0c7a631b613fe8ec443e6b17ea8f7a17fb8abb5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 25 Jun 2017 04:54:56 +0700 Subject: [PATCH 08/10] [adobepass] Add support for ATTOTT MSO (DIRECTV NOW) (closes #13472) --- youtube_dl/extractor/adobepass.py | 43 ++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 3dfc632e7..b83b51efb 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -15,6 +15,7 @@ from ..utils import ( urlencode_postdata, unified_timestamp, ExtractorError, + NO_DEFAULT, ) @@ -24,6 +25,11 @@ MSO_INFO = { 'username_field': 'username', 'password_field': 'password', }, + 'ATTOTT': { + 'name': 'DIRECTV NOW', + 'username_field': 'email', + 'password_field': 'loginpassword', + }, 'Rogers': { 'name': 'Rogers', 'username_field': 'UserName', @@ -1316,6 +1322,8 @@ class AdobePassIE(InfoExtractor): _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' _MVPD_CACHE = 'ap-mvpd' + _DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page' + def _download_webpage_handle(self, *args, **kwargs): headers = kwargs.get('headers', {}) headers.update(self.geo_verification_headers()) @@ -1365,6 +1373,21 @@ class AdobePassIE(InfoExtractor): 'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier ' 'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True) + def extract_redirect_url(html, url=None, fatal=False): + # TODO: eliminate code duplication with generic extractor and move + # redirection code into _download_webpage_handle + REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)' + redirect_url = self._search_regex( + r'(?i) Date: Sun, 25 Jun 2017 05:13:12 +0700 Subject: [PATCH 09/10] [ChangeLog] Actualize --- ChangeLog | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/ChangeLog b/ChangeLog index 746250db9..d3343c760 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +version + +Core ++ [adobepass] Add support for DIRECTV NOW (mso ATTOTT) (#13472) +* [YoutubeDL] Skip malformed formats for better extraction robustness + +Extractors ++ [wsj] Add support for barrons.com (#13470) ++ [ign] Add another video id pattern (#13328) ++ [raiplay:live] Add support for live streams (#13414) ++ [redbulltv] Add support for live videos and segments (#13486) ++ [onetpl] Add support for videos embedded via pulsembed (#13482) +* [ooyala] Make more robust +* [ooyala] Skip empty format URLs (#13471, #13476) +* [hgtv.com:show] Fix typo + + version 2017.06.23 Core From a7ce8f16c4ee4b8f567d943ada8ade8a50f99860 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 25 Jun 2017 05:16:06 +0700 Subject: [PATCH 10/10] release 2017.06.25 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 4e7ceafd8..82bbbdaa6 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.06.23*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.06.23** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.06.25*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.06.25** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.06.23 +[debug] youtube-dl version 2017.06.25 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index d3343c760..a0072ffe3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.06.25 Core + [adobepass] Add support for DIRECTV NOW (mso ATTOTT) (#13472) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e827ec0cf..010ff762c 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -644,6 +644,7 @@ - **RadioJavan** - **Rai** - **RaiPlay** + - **RaiPlayLive** - **RBMARadio** - **RDS**: RDS.ca - **RedBullTV** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index dfb69ab8d..b6d378896 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.06.23' +__version__ = '2017.06.25'