From 37084f6641c07dbe6580b366f330eb4126d18bbe Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 9 Feb 2017 16:24:54 +0100 Subject: [PATCH 01/12] [kaltura] improve embed partner id extraction(fixes #12041) --- youtube_dl/extractor/kaltura.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 5ef382f9f..c6483bcf0 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -27,7 +27,7 @@ class KalturaIE(InfoExtractor): (?: (?: # flash player - index\.php/kwidget| + index\.php/(?:kwidget|extwidget/preview)| # html5 player html5/html5lib/[^/]+/mwEmbedFrame\.php ) @@ -94,6 +94,10 @@ class KalturaIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, + { + 'url': 'https://www.kaltura.com/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto', + 'only_matching': True, } ] @@ -112,7 +116,7 @@ class KalturaIE(InfoExtractor): re.search( r'''(?xs) (?P["\']) - (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/(?:(?!(?P=q1)).)*(?:p|partner_id)/(?P\d+)(?:(?!(?P=q1)).)* + (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?:(?!(?P=q1)).)*/(?:p|partner_id)/(?P\d+)(?:(?!(?P=q1)).)* (?P=q1).*? (?: entry_?[Ii]d| @@ -209,6 +213,8 @@ class KalturaIE(InfoExtractor): partner_id = params['wid'][0][1:] elif 'p' in params: partner_id = params['p'][0] + elif 'partner_id' in params: + partner_id = params['partner_id'][0] else: raise ExtractorError('Invalid URL', expected=True) if 'entry_id' in params: From be670b8e8f9c32fe3d37666b28c4889d780d5964 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 9 Feb 2017 17:36:59 +0100 Subject: [PATCH 02/12] [external:ffmpeg] do not assume that ffmpeg unknown version format is new --- youtube_dl/downloader/external.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 41e37261d..bdd3545a2 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -275,7 +275,7 @@ class FFmpegFD(ExternalFD): args += ['-f', 'mpegts'] else: args += ['-f', 'mp4'] - if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2')) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')): + if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')): args += ['-bsf:a', 'aac_adtstoasc'] elif protocol == 'rtmp': args += ['-f', 'flv'] From 78ef214d2d8010f2fc7ab451c9b4ae137c2569dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 9 Feb 2017 23:42:40 +0700 Subject: [PATCH 03/12] [facebook] Improve JS data regex (closes #12042) --- youtube_dl/extractor/facebook.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index b325c8200..4a3c839f4 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -134,6 +134,20 @@ class FacebookIE(InfoExtractor): 'upload_date': '20161030', 'uploader': 'CNN', }, + }, { + # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall + 'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/', + 'info_dict': { + 'id': '1417995061575415', + 'ext': 'mp4', + 'title': 'md5:a7b86ca673f51800cd54687b7f4012fe', + 'timestamp': 1486648217, + 'upload_date': '20170209', + 'uploader': 'Yaroslav Korpan', + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'https://www.facebook.com/video.php?v=10204634152394104', 'only_matching': True, @@ -262,7 +276,7 @@ class FacebookIE(InfoExtractor): if not video_data: server_js_data = self._parse_json( self._search_regex( - r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+stream_pagelet', + r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall)', webpage, 'js data', default='{}'), video_id, transform_source=js_to_json, fatal=False) if server_js_data: From e64b0fca147c1512c8d31d02aedefed78411bbd9 Mon Sep 17 00:00:00 2001 From: Thomas Christlieb Date: Wed, 8 Feb 2017 13:53:39 +0100 Subject: [PATCH 04/12] [pornhub] Fix extraction (closes #12007) --- youtube_dl/extractor/pornhub.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 017f6c552..5e930f45e 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -156,11 +156,24 @@ class PornHubIE(InfoExtractor): comment_count = self._extract_count( r'All Comments\s*\(([\d,.]+)\)', webpage, 'comment') + video_variables = {} + for video_variablename, quote, video_variable in re.findall( + r'(player_quality_[0-9]{3,4}p[0-9a-z]+?)=\s*(["\'])(.*?)\2;', webpage): + video_variables[video_variablename] = video_variable + + encoded_video_urls = [] + for encoded_video_url in re.findall( + r'player_quality_[0-9]{3,4}p\s*=(.*?);', webpage): + encoded_video_urls.append(encoded_video_url) + + # Decode the URLs video_urls = [] - for quote, video_url in re.findall( - r'player_quality_[0-9]{3,4}p\s*=\s*(["\'])(.+?)\1;', webpage): - video_urls.append(compat_urllib_parse_unquote(re.sub( - r'{0}\s*\+\s*{0}'.format(quote), '', video_url))) + for url in encoded_video_urls: + for varname, varval in video_variables.items(): + url = url.replace(varname, varval) + url = url.replace('+', '') + url = url.replace(' ', '') + video_urls.append(url) if webpage.find('"encrypted":true') != -1: password = compat_urllib_parse_unquote_plus( From b7f9843bec27d04f66c0656da22137e32fa157ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 10 Feb 2017 00:57:44 +0700 Subject: [PATCH 05/12] [pornhub] Simplify (closes #12018) --- youtube_dl/extractor/pornhub.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 5e930f45e..818d99c1f 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -158,22 +158,15 @@ class PornHubIE(InfoExtractor): video_variables = {} for video_variablename, quote, video_variable in re.findall( - r'(player_quality_[0-9]{3,4}p[0-9a-z]+?)=\s*(["\'])(.*?)\2;', webpage): + r'(player_quality_[0-9]{3,4}p\w+)\s*=\s*(["\'])(.+?)\2;', webpage): video_variables[video_variablename] = video_variable - encoded_video_urls = [] - for encoded_video_url in re.findall( - r'player_quality_[0-9]{3,4}p\s*=(.*?);', webpage): - encoded_video_urls.append(encoded_video_url) - - # Decode the URLs video_urls = [] - for url in encoded_video_urls: + for encoded_video_url in re.findall( + r'player_quality_[0-9]{3,4}p\s*=(.+?);', webpage): for varname, varval in video_variables.items(): - url = url.replace(varname, varval) - url = url.replace('+', '') - url = url.replace(' ', '') - video_urls.append(url) + encoded_video_url = encoded_video_url.replace(varname, varval) + video_urls.append(re.sub(r'[\s+]', '', encoded_video_url)) if webpage.find('"encrypted":true') != -1: password = compat_urllib_parse_unquote_plus( From 9150d1eb6936e7f3ad168095f57dc0c8f56d2364 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 10 Feb 2017 01:03:35 +0700 Subject: [PATCH 06/12] [xtube] Fix extraction (closes #12023) --- youtube_dl/extractor/xtube.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index 83bc1fef2..91bae7ee7 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -60,7 +60,8 @@ class XTubeIE(InfoExtractor): webpage = self._download_webpage(req, display_id) sources = self._parse_json(self._search_regex( - r'sources\s*:\s*({.+?}),', webpage, 'sources'), video_id) + r'(["\'])sources\1\s*:\s*(?P{.+?}),', + webpage, 'sources', group='sources'), video_id) formats = [] for format_id, format_url in sources.items(): @@ -81,10 +82,10 @@ class XTubeIE(InfoExtractor): r']+class="nickname"[^>]*>([^<]+)'), webpage, 'uploader', fatal=False) duration = parse_duration(self._search_regex( - r'
Runtime:
\s*
([^<]+)
', + r'
Runtime:?
\s*
([^<]+)
', webpage, 'duration', fatal=False)) view_count = str_to_int(self._search_regex( - r'
Views:
\s*
([\d,\.]+)
', + r'
Views:?
\s*
([\d,\.]+)
', webpage, 'view count', fatal=False)) comment_count = str_to_int(self._html_search_regex( r'>Comments? \(([\d,\.]+)\)<', From fbc6dc525e525565544b377a1d16cd915cb11a7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 10 Feb 2017 01:05:48 +0700 Subject: [PATCH 07/12] [xtube] Fix shortcuts --- youtube_dl/extractor/xtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index 91bae7ee7..11717fe98 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -53,7 +53,7 @@ class XTubeIE(InfoExtractor): if not display_id: display_id = video_id - url = 'http://www.xtube.com/watch.php?v=%s' % video_id + url = 'http://www.xtube.com/video-watch/-%s' % video_id req = sanitized_Request(url) req.add_header('Cookie', 'age_verified=1; cookiesAccepted=1') From ff24261ba0359d51dfa54fe4c84a9db157e3b76c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 10 Feb 2017 01:24:14 +0700 Subject: [PATCH 08/12] [kaltura] Add explicit port to regexes They should not match e.g. cdnapi.kaltura.computernetworks.com/... --- youtube_dl/extractor/kaltura.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index c6483bcf0..54374ea76 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -23,7 +23,7 @@ class KalturaIE(InfoExtractor): (?: kaltura:(?P\d+):(?P[0-9a-z_]+)| https?:// - (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/ + (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/ (?: (?: # flash player @@ -98,6 +98,10 @@ class KalturaIE(InfoExtractor): { 'url': 'https://www.kaltura.com/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto', 'only_matching': True, + }, + { + 'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto', + 'only_matching': True, } ] @@ -116,7 +120,7 @@ class KalturaIE(InfoExtractor): re.search( r'''(?xs) (?P["\']) - (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?:(?!(?P=q1)).)*/(?:p|partner_id)/(?P\d+)(?:(?!(?P=q1)).)* + (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P\d+)(?:(?!(?P=q1)).)* (?P=q1).*? (?: entry_?[Ii]d| From 61ee556aea69f60c5853f27bc92240d4758d7362 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 10 Feb 2017 01:26:00 +0700 Subject: [PATCH 09/12] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index 7e2afaacf..237be5130 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +version + +Extractors +* [xtube] Fix extraction (#12023) +* [pornhub] Fix extraction (#12007, #12018) +* [facebook] Improve JS data regular expression (#12042) +* [kaltura] Improve embed partner id extraction (#12041) ++ [sprout] Add support for sproutonline.com +* [6play] Improve extraction ++ [scrippsnetworks:watch] Add support for Scripps Networks sites (#10765) ++ [go] Add support for Adobe Pass authentication (#11468, #10831) +* [6play] Fix extraction (#12011) ++ [nbc] Add support for Adobe Pass authentication (#12006) + + version 2017.02.07 Core From 55d4de2283d94c8846357e9a4fd8c74df9eb2835 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 10 Feb 2017 01:27:33 +0700 Subject: [PATCH 10/12] release 2017.02.10 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 4 +++- youtube_dl/version.py | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 20a726fc4..0c1569fd4 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.07** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.10** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.07 +[debug] youtube-dl version 2017.02.10 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 237be5130..d4c8081f7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.02.10 Extractors * [xtube] Fix extraction (#12023) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 2d82cc321..76882f3b5 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -11,6 +11,7 @@ - **4tube** - **56.com** - **5min** + - **6play** - **8tracks** - **91porn** - **9c9media** @@ -667,6 +668,7 @@ - **screen.yahoo:search**: Yahoo screen search - **Screencast** - **ScreencastOMatic** + - **scrippsnetworks:watch** - **Seeker** - **SenateISVP** - **SendtoNews** @@ -676,7 +678,6 @@ - **Shared**: shared.sx - **ShowRoomLive** - **Sina** - - **SixPlay** - **skynewsarabia:article** - **skynewsarabia:video** - **SkySports** @@ -711,6 +712,7 @@ - **SportBoxEmbed** - **SportDeutschland** - **Sportschau** + - **Sprout** - **sr:mediathek**: Saarländischer Rundfunk - **SRGSSR** - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a73e9d89c..a8395ce04 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.07' +__version__ = '2017.02.10' From 4d32b6385160e8cf9117839022ed795ce02b107d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 9 Feb 2017 23:07:43 +0100 Subject: [PATCH 11/12] [tvplayer] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tvplayer.py | 75 ++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 youtube_dl/extractor/tvplayer.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3445e7d40..5115e1a0c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1019,6 +1019,7 @@ from .tvplay import ( TVPlayIE, ViafreeIE, ) +from .tvplayer import TVPlayerIE from .tweakers import TweakersIE from .twentyfourvideo import TwentyFourVideoIE from .twentymin import TwentyMinutenIE diff --git a/youtube_dl/extractor/tvplayer.py b/youtube_dl/extractor/tvplayer.py new file mode 100644 index 000000000..b6537141a --- /dev/null +++ b/youtube_dl/extractor/tvplayer.py @@ -0,0 +1,75 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_HTTPError +from ..utils import ( + extract_attributes, + urlencode_postdata, + ExtractorError, +) + + +class TVPlayerIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tvplayer\.com/watch/(?P[^/?#]+)' + _TEST = { + 'url': 'http://tvplayer.com/watch/bbcone', + 'info_dict': { + 'id': '89', + 'ext': 'mp4', + 'title': r're:^BBC One [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + current_channel = extract_attributes(self._search_regex( + r'(]+class="[^"]*current-channel[^"]*"[^>]*>)', + webpage, 'channel element')) + title = current_channel['data-name'] + + resource_id = self._search_regex( + r'resourceId\s*=\s*"(\d+)"', webpage, 'resource id') + platform = self._search_regex( + r'platform\s*=\s*"([^"]+)"', webpage, 'platform') + token = self._search_regex( + r'token\s*=\s*"([^"]+)"', webpage, 'token', default='null') + validate = self._search_regex( + r'validate\s*=\s*"([^"]+)"', webpage, 'validate', default='null') + + try: + response = self._download_json( + 'http://api.tvplayer.com/api/v2/stream/live', + resource_id, headers={ + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + }, data=urlencode_postdata({ + 'service': 1, + 'platform': platform, + 'id': resource_id, + 'token': token, + 'validate': validate, + }))['tvplayer']['response'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + response = self._parse_json( + e.cause.read().decode(), resource_id)['tvplayer']['response'] + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, response['error']), expected=True) + raise + + formats = self._extract_m3u8_formats(response['stream'], resource_id, 'mp4') + self._sort_formats(formats) + + return { + 'id': resource_id, + 'display_id': display_id, + 'title': self._live_title(title), + 'formats': formats, + 'is_live': True, + } From e01bfc19c345f189b65f3c6e4064b304f1cd337f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 10 Feb 2017 09:39:24 +0700 Subject: [PATCH 12/12] [extractor/commonmistakes] Restrict _VALID_URL (closes #12050) --- youtube_dl/extractor/commonmistakes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/commonmistakes.py b/youtube_dl/extractor/commonmistakes.py index 2f86e2381..d3ed4a9a4 100644 --- a/youtube_dl/extractor/commonmistakes.py +++ b/youtube_dl/extractor/commonmistakes.py @@ -7,7 +7,7 @@ from ..utils import ExtractorError class CommonMistakesIE(InfoExtractor): IE_DESC = False # Do not list _VALID_URL = r'''(?x) - (?:url|URL) + (?:url|URL)$ ''' _TESTS = [{