From 7409af9eb322b0e76a4ae2d713020c45d178fabe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Jun 2017 08:56:53 +0700 Subject: [PATCH 01/18] [msn] Fix formats extraction --- youtube_dl/extractor/msn.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/msn.py b/youtube_dl/extractor/msn.py index 1473bcf48..650731fdc 100644 --- a/youtube_dl/extractor/msn.py +++ b/youtube_dl/extractor/msn.py @@ -68,10 +68,6 @@ class MSNIE(InfoExtractor): format_url = file_.get('url') if not format_url: continue - ext = determine_ext(format_url) - if ext == 'ism': - formats.extend(self._extract_ism_formats( - format_url + '/Manifest', display_id, 'mss', fatal=False)) if 'm3u8' in format_url: # m3u8_native should not be used here until # https://github.com/rg3/youtube-dl/issues/9913 is fixed @@ -79,6 +75,9 @@ class MSNIE(InfoExtractor): format_url, display_id, 'mp4', m3u8_id='hls', fatal=False) formats.extend(m3u8_formats) + elif determine_ext(format_url) == 'ism': + formats.extend(self._extract_ism_formats( + format_url + '/Manifest', display_id, 'mss', fatal=False)) else: formats.append({ 'url': format_url, From 6e999fbc120dfdbd8ec5ca056105420fd7cbffe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Jun 2017 19:44:44 +0700 Subject: [PATCH 02/18] [newgrounds] Improve formats and uploader extraction (closes #13346) --- youtube_dl/extractor/newgrounds.py | 48 ++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py index 9bea610c8..ae4fe95ef 100644 --- a/youtube_dl/extractor/newgrounds.py +++ b/youtube_dl/extractor/newgrounds.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import int_or_none class NewgroundsIE(InfoExtractor): @@ -23,24 +24,61 @@ class NewgroundsIE(InfoExtractor): 'title': 'Dancin', 'uploader': 'Squirrelman82', }, + }, { + # source format unavailable, additional mp4 formats + 'url': 'http://www.newgrounds.com/portal/view/689400', + 'info_dict': { + 'id': '689400', + 'ext': 'mp4', + 'title': 'ZTV News Episode 8', + 'uploader': 'BennettTheSage', + }, + 'params': { + 'skip_download': True, + }, }] def _real_extract(self, url): media_id = self._match_id(url) + webpage = self._download_webpage(url, media_id) title = self._html_search_regex( r'([^>]+)', webpage, 'title') - uploader = self._html_search_regex( - r'Author\s*]+>([^<]+)', webpage, 'uploader', fatal=False) + video_url = self._parse_json(self._search_regex( + r'"url"\s*:\s*("[^"]+"),', webpage, ''), media_id) - music_url = self._parse_json(self._search_regex( - r'"url":("[^"]+"),', webpage, ''), media_id) + formats = [{ + 'url': video_url, + 'format_id': 'source', + 'quality': 1, + }] + + max_resolution = int_or_none(self._search_regex( + r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution', + default=None)) + if max_resolution: + url_base = video_url.rpartition('.')[0] + for resolution in (360, 720, 1080): + if resolution > max_resolution: + break + formats.append({ + 'url': '%s.%dp.mp4' % (url_base, resolution), + 'format_id': '%dp' % resolution, + 'height': resolution, + }) + + self._check_formats(formats, media_id) + self._sort_formats(formats) + + uploader = self._html_search_regex( + r'(?:Author|Writer)\s*]+>([^<]+)', webpage, 'uploader', + fatal=False) return { 'id': media_id, 'title': title, - 'url': music_url, 'uploader': uploader, + 'formats': formats, } From 70e7967202efa0acb6202ceedf87049ba72b94e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Jun 2017 20:50:33 +0700 Subject: [PATCH 03/18] [newgrounds:playlist] Add extractor (closes #10611) --- youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/newgrounds.py | 56 +++++++++++++++++++++++++++++- 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e1907314d..7e45232dd 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -636,7 +636,10 @@ from .neteasemusic import ( NetEaseMusicProgramIE, NetEaseMusicDjRadioIE, ) -from .newgrounds import NewgroundsIE +from .newgrounds import ( + NewgroundsIE, + NewgroundsPlaylistIE, +) from .newstube import NewstubeIE from .nextmedia import ( NextMediaIE, diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py index ae4fe95ef..dc183caec 100644 --- a/youtube_dl/extractor/newgrounds.py +++ b/youtube_dl/extractor/newgrounds.py @@ -1,7 +1,12 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + extract_attributes, + int_or_none, +) class NewgroundsIE(InfoExtractor): @@ -82,3 +87,52 @@ class NewgroundsIE(InfoExtractor): 'uploader': uploader, 'formats': formats, } + + +class NewgroundsPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:collection|[^/]+/search/[^/]+)/(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.newgrounds.com/collection/cats', + 'info_dict': { + 'id': 'cats', + 'title': 'Cats', + }, + 'playlist_mincount': 46, + }, { + 'url': 'http://www.newgrounds.com/portal/search/author/ZONE-SAMA', + 'info_dict': { + 'id': 'ZONE-SAMA', + 'title': 'Portal Search: ZONE-SAMA', + }, + 'playlist_mincount': 47, + }, { + 'url': 'http://www.newgrounds.com/audio/search/title/cats', + 'only_matching': True, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + title = self._search_regex( + r'([^>]+)', webpage, 'title', default=None) + + # cut left menu + webpage = self._search_regex( + r'(?s)]+\bclass=["\']column wide(.+)', + webpage, 'wide column', default=webpage) + + entries = [] + for a, path, media_id in re.findall( + r'(]+\bhref=["\']/?((?:portal/view|audio/listen)/(\d+))[^>]+>)', + webpage): + a_class = extract_attributes(a).get('class') + if a_class not in ('item-portalsubmission', 'item-audiosubmission'): + continue + entries.append( + self.url_result( + 'https://www.newgrounds.com/%s' % path, + ie=NewgroundsIE.ie_key(), video_id=media_id)) + + return self.playlist_result(entries, playlist_id, title) From 2ae2ffda5eae9c64d40d2fec839ba5deb07717f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Jun 2017 21:27:22 +0700 Subject: [PATCH 04/18] [utils] Improve unified_timestamp --- test/test_utils.py | 1 + youtube_dl/utils.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index d7e05817c..41b094d89 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -340,6 +340,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500) self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100) self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361) + self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540) def test_determine_ext(self): self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d79410e58..16bf49408 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1179,7 +1179,7 @@ def unified_timestamp(date_str, day_first=True): if date_str is None: return None - date_str = date_str.replace(',', ' ') + date_str = re.sub(r'[,|]', '', date_str) pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0 timezone, date_str = extract_timezone(date_str) From 28a4d6cce89d159f9fe701e6cf716c7e3ffa4415 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Jun 2017 21:27:32 +0700 Subject: [PATCH 05/18] [newgrounds] Extract more metadata (closes #13232) --- youtube_dl/extractor/newgrounds.py | 38 ++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py index dc183caec..0e26f8399 100644 --- a/youtube_dl/extractor/newgrounds.py +++ b/youtube_dl/extractor/newgrounds.py @@ -6,6 +6,9 @@ from .common import InfoExtractor from ..utils import ( extract_attributes, int_or_none, + parse_duration, + parse_filesize, + unified_timestamp, ) @@ -19,7 +22,10 @@ class NewgroundsIE(InfoExtractor): 'ext': 'mp3', 'title': 'B7 - BusMode', 'uploader': 'Burn7', - } + 'timestamp': 1378878540, + 'upload_date': '20130911', + 'duration': 143, + }, }, { 'url': 'https://www.newgrounds.com/portal/view/673111', 'md5': '3394735822aab2478c31b1004fe5e5bc', @@ -28,6 +34,8 @@ class NewgroundsIE(InfoExtractor): 'ext': 'mp4', 'title': 'Dancin', 'uploader': 'Squirrelman82', + 'timestamp': 1460256780, + 'upload_date': '20160410', }, }, { # source format unavailable, additional mp4 formats @@ -37,6 +45,8 @@ class NewgroundsIE(InfoExtractor): 'ext': 'mp4', 'title': 'ZTV News Episode 8', 'uploader': 'BennettTheSage', + 'timestamp': 1487965140, + 'upload_date': '20170224', }, 'params': { 'skip_download': True, @@ -51,11 +61,11 @@ class NewgroundsIE(InfoExtractor): title = self._html_search_regex( r'([^>]+)', webpage, 'title') - video_url = self._parse_json(self._search_regex( + media_url = self._parse_json(self._search_regex( r'"url"\s*:\s*("[^"]+"),', webpage, ''), media_id) formats = [{ - 'url': video_url, + 'url': media_url, 'format_id': 'source', 'quality': 1, }] @@ -64,7 +74,7 @@ class NewgroundsIE(InfoExtractor): r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution', default=None)) if max_resolution: - url_base = video_url.rpartition('.')[0] + url_base = media_url.rpartition('.')[0] for resolution in (360, 720, 1080): if resolution > max_resolution: break @@ -77,14 +87,32 @@ class NewgroundsIE(InfoExtractor): self._check_formats(formats, media_id) self._sort_formats(formats) - uploader = self._html_search_regex( + uploader = self._search_regex( r'(?:Author|Writer)\s*]+>([^<]+)', webpage, 'uploader', fatal=False) + timestamp = unified_timestamp(self._search_regex( + r'
Uploaded
\s*
([^<]+)', webpage, 'timestamp', + default=None)) + duration = parse_duration(self._search_regex( + r'
Song\s*
.+?
([^<]+)', webpage, 'duration', + default=None)) + + filesize_approx = parse_filesize(self._html_search_regex( + r'
Song\s*
(.+?)
', webpage, 'filesize', + default=None)) + if len(formats) == 1: + formats[0]['filesize_approx'] = filesize_approx + + if '
Song' in webpage: + formats[0]['vcodec'] = 'none' + return { 'id': media_id, 'title': title, 'uploader': uploader, + 'timestamp': timestamp, + 'duration': duration, 'formats': formats, } From 624bd0104c3c0f367ecb129c438da57398668801 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Jun 2017 21:36:19 +0700 Subject: [PATCH 06/18] [rutv] Add support for testplayer.vgtrk.com (closes #13347) --- youtube_dl/extractor/rutv.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py index a5e672c0a..d2713c19a 100644 --- a/youtube_dl/extractor/rutv.py +++ b/youtube_dl/extractor/rutv.py @@ -13,11 +13,15 @@ from ..utils import ( class RUTVIE(InfoExtractor): IE_DESC = 'RUTV.RU' _VALID_URL = r'''(?x) - https?://player\.(?:rutv\.ru|vgtrk\.com)/ - (?Pflash\d+v/container\.swf\?id= - |iframe/(?Pswf|video|live)/id/ - |index/iframe/cast_id/) - (?P\d+)''' + https?:// + (?:test)?player\.(?:rutv\.ru|vgtrk\.com)/ + (?P + flash\d+v/container\.swf\?id=| + iframe/(?Pswf|video|live)/id/| + index/iframe/cast_id/ + ) + (?P\d+) + ''' _TESTS = [ { @@ -99,17 +103,21 @@ class RUTVIE(InfoExtractor): 'skip_download': True, }, }, + { + 'url': 'https://testplayer.vgtrk.com/iframe/live/id/19201/showZoomBtn/false/isPlay/true/', + 'only_matching': True, + }, ] @classmethod def _extract_url(cls, webpage): mobj = re.search( - r']+?src=(["\'])(?Phttps?://player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage) + r']+?src=(["\'])(?Phttps?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage) if mobj: return mobj.group('url') mobj = re.search( - r']+?property=(["\'])og:video\1[^>]+?content=(["\'])(?Phttps?://player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)', + r']+?property=(["\'])og:video\1[^>]+?content=(["\'])(?Phttps?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)', webpage) if mobj: return mobj.group('url') From 16bc958287b25566dbe22d22274d4c992278d2f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Jun 2017 00:14:04 +0700 Subject: [PATCH 07/18] [xfileshare] Modernize and pass referrer --- youtube_dl/extractor/xfileshare.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index 13f8be6cb..bfec17e3c 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -130,12 +130,12 @@ class XFileShareIE(InfoExtractor): if countdown: self._sleep(countdown, video_id) - post = urlencode_postdata(fields) - - req = sanitized_Request(url, post) - req.add_header('Content-type', 'application/x-www-form-urlencoded') - - webpage = self._download_webpage(req, video_id, 'Downloading video page') + webpage = self._download_webpage( + url, video_id, 'Downloading video page', + data=urlencode_postdata(fields), headers={ + 'Referer': url, + 'Content-type': 'application/x-www-form-urlencoded', + }) title = (self._search_regex( (r'style="z-index: [0-9]+;">([^<]+)', From 534863e057b155cd71035f05600444c2be62ca95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Jun 2017 00:16:47 +0700 Subject: [PATCH 08/18] [xfileshare] Add support for rapidvideo (closes #13348) --- youtube_dl/extractor/xfileshare.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index bfec17e3c..32e16bc73 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -30,6 +30,7 @@ class XFileShareIE(InfoExtractor): (r'vidabc\.com', 'Vid ABC'), (r'vidbom\.com', 'VidBom'), (r'vidlo\.us', 'vidlo'), + (r'rapidvideo\.(?:cool|org)', 'RapidVideo.TV'), ) IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1]) @@ -109,6 +110,9 @@ class XFileShareIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'http://www.rapidvideo.cool/b667kprndr8w', + 'only_matching': True, }] def _real_extract(self, url): From 72b409559c984bb116015ada55acd38120b24cc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Jun 2017 01:50:32 +0700 Subject: [PATCH 09/18] [compat] Introduce compat_HTMLParseError --- youtube_dl/compat.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 39527117f..bbc499eda 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2322,6 +2322,19 @@ try: except ImportError: # Python 2 from HTMLParser import HTMLParser as compat_HTMLParser +try: # Python 2 + from HTMLParser import HTMLParseError as compat_HTMLParseError +except ImportError: # Python <3.4 + try: + from html.parser import HTMLParseError as compat_HTMLParseError + except ImportError: # Python >3.4 + + # HTMLParseError has been deprecated in Python 3.3 and removed in + # Python 3.5. Introducing dummy exception for Python >3.5 for compatible + # and uniform cross-version exceptiong handling + class compat_HTMLParseError(Exception): + pass + try: from subprocess import DEVNULL compat_subprocess_get_DEVNULL = lambda: DEVNULL From b4a3d461e4a00dfc60047b667aa3136c8b03eda8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Jun 2017 01:52:24 +0700 Subject: [PATCH 10/18] [utils] Handle HTMLParseError in extract_attributes (closes #13349) --- test/test_utils.py | 2 ++ youtube_dl/utils.py | 9 +++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 41b094d89..2b93b3604 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -916,6 +916,8 @@ class TestUtil(unittest.TestCase): supports_outside_bmp = False if supports_outside_bmp: self.assertEqual(extract_attributes(''), {'x': 'Smile \U0001f600!'}) + # Malformed HTML should not break attributes extraction on older Python + self.assertEqual(extract_attributes(''), {}) def test_clean_html(self): self.assertEqual(clean_html('a:\nb'), 'a: b') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 16bf49408..1973bd483 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -36,6 +36,7 @@ import xml.etree.ElementTree import zlib from .compat import ( + compat_HTMLParseError, compat_HTMLParser, compat_basestring, compat_chr, @@ -409,8 +410,12 @@ def extract_attributes(html_element): but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. """ parser = HTMLAttributeParser() - parser.feed(html_element) - parser.close() + try: + parser.feed(html_element) + parser.close() + # Older Python may throw HTMLParseError in case of malformed HTML + except compat_HTMLParseError: + pass return parser.attrs From bf87c36c93aee81a1bf26c8213262724955d52a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Jun 2017 02:01:12 +0700 Subject: [PATCH 11/18] [xfileshare] PEP 8 --- youtube_dl/extractor/xfileshare.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index 32e16bc73..a32789ef4 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -10,7 +10,6 @@ from ..utils import ( ExtractorError, int_or_none, NO_DEFAULT, - sanitized_Request, urlencode_postdata, ) From 9932ac5c58080e52b3111f5f8dec15c65c4079ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Jun 2017 02:01:15 +0700 Subject: [PATCH 12/18] [ChangeLog] Actualize --- ChangeLog | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/ChangeLog b/ChangeLog index c605c5bd6..19f160c81 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,36 @@ +version + +Core +* [utils] Handle compat_HTMLParseError in extract_attributes (#13349) ++ [compat] Introduce compat_HTMLParseError +* [utils] Improve unified_timestamp +* [extractor/generic] Ensure format id is unicode string +* [extractor/common] Return unicode string from _match_id ++ [YoutubeDL] Sanitize more fields (#13313) + +Extractors ++ [xfileshare] Add support for rapidvideo.tv (#13348) +* [xfileshare] Modernize and pass Referer ++ [rutv] Add support for testplayer.vgtrk.com (#13347) ++ [newgrounds] Extract more metadata (#13232) ++ [newgrounds:playlist] Add support for playlists (#10611) +* [newgrounds] Improve formats and uploader extraction (#13346) +* [msn] Fix formats extraction +* [turbo] Ensure format id is string +* [sexu] Ensure height is int +* [jove] Ensure comment count is int +* [golem] Ensure format id is string +* [gfycat] Ensure filesize is int +* [foxgay] Ensure height is int +* [flickr] Ensure format id is string +* [sohu] Fix numeric fields +* [safari] Improve authentication detection (#13319) +* [liveleak] Ensure height is int (#13313) +* [streamango] Make title optional (#13292) +* [rtlnl] Improve URL regular expression (#13295) +* [tvplayer] Fix extraction (#13291) + + version 2017.06.05 Core From cb1e6d898532e86b0752117337b7519d22c63274 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Jun 2017 02:23:17 +0700 Subject: [PATCH 13/18] release 2017.06.12 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- README.md | 28 ++++++++++++++-------------- docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 5 files changed, 21 insertions(+), 20 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index ce1e46085..b8806e7b3 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.06.05*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.06.05** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.06.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.06.12** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.06.05 +[debug] youtube-dl version 2017.06.12 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 19f160c81..e3b7f7d60 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.06.12 Core * [utils] Handle compat_HTMLParseError in extract_attributes (#13349) diff --git a/README.md b/README.md index 2189552a9..fe2bebc2a 100644 --- a/README.md +++ b/README.md @@ -145,18 +145,18 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo --max-views COUNT Do not download any videos with more than COUNT views --match-filter FILTER Generic video filter. Specify any key (see - help for -o for a list of available keys) - to match if the key is present, !key to - check if the key is not present, key > - NUMBER (like "comment_count > 12", also - works with >=, <, <=, !=, =) to compare - against a number, key = 'LITERAL' (like - "uploader = 'Mike Smith'", also works with - !=) to match against a string literal and & - to require multiple matches. Values which - are not known are excluded unless you put a - question mark (?) after the operator. For - example, to only match videos that have + the "OUTPUT TEMPLATE" for a list of + available keys) to match if the key is + present, !key to check if the key is not + present, key > NUMBER (like "comment_count + > 12", also works with >=, <, <=, !=, =) to + compare against a number, key = 'LITERAL' + (like "uploader = 'Mike Smith'", also works + with !=) to match against a string literal + and & to require multiple matches. Values + which are not known are excluded unless you + put a question mark (?) after the operator. + For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike functionality is not available at the given @@ -277,8 +277,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo --get-filename Simulate, quiet but print output filename --get-format Simulate, quiet but print output format -j, --dump-json Simulate, quiet but print JSON information. - See --output for a description of available - keys. + See the "OUTPUT TEMPLATE" for a description + of available keys. -J, --dump-single-json Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole diff --git a/docs/supportedsites.md b/docs/supportedsites.md index fe022d158..7189f31d9 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -512,6 +512,7 @@ - **netease:song**: 网易云音乐 - **Netzkino** - **Newgrounds** + - **NewgroundsPlaylist** - **Newstube** - **NextMedia**: 蘋果日報 - **NextMediaActionNews**: 蘋果日報 - 動新聞 @@ -974,7 +975,7 @@ - **WSJArticle** - **XBef** - **XboxClips** - - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo + - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV - **XHamster** - **XHamsterEmbed** - **xiami:album**: 虾米音乐 - 专辑 diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d3aa39977..a7386c3a8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.06.05' +__version__ = '2017.06.12' From b081f53b08e791d9e688bfa1a17caf3bb3630693 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Jun 2017 02:35:57 +0700 Subject: [PATCH 14/18] [compat] Add compat_HTMLParseError to __all__ --- youtube_dl/compat.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index bbc499eda..7ef327451 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2895,6 +2895,7 @@ else: __all__ = [ + 'compat_HTMLParseError', 'compat_HTMLParser', 'compat_HTTPError', 'compat_basestring', From 97fa1f8dc4df553624b76a3da2b00edc47744a16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 13 Jun 2017 23:15:06 +0700 Subject: [PATCH 15/18] [corus] Add support for history.ca (closes #13359) --- youtube_dl/extractor/corus.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/corus.py b/youtube_dl/extractor/corus.py index 7b2f5008b..9cdd34636 100644 --- a/youtube_dl/extractor/corus.py +++ b/youtube_dl/extractor/corus.py @@ -8,7 +8,16 @@ from ..utils import int_or_none class CorusIE(ThePlatformFeedIE): - _VALID_URL = r'https?://(?:www\.)?(?P(?:globaltv|etcanada)\.com|(?:hgtv|foodnetwork|slice)\.ca)/(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))(?P\d+)' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?P + (?:globaltv|etcanada)\.com| + (?:hgtv|foodnetwork|slice|history)\.ca + ) + /(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=)) + (?P\d+) + ''' _TESTS = [{ 'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/', 'md5': '05dcbca777bf1e58c2acbb57168ad3a6', @@ -27,6 +36,9 @@ class CorusIE(ThePlatformFeedIE): }, { 'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/', 'only_matching': True, + }, { + 'url': 'http://www.history.ca/the-world-without-canada/video/full-episodes/natural-resources/video.html?v=955054659646#video', + 'only_matching': True, }] _TP_FEEDS = { @@ -50,6 +62,10 @@ class CorusIE(ThePlatformFeedIE): 'feed_id': '5tUJLgV2YNJ5', 'account_id': 2414427935, }, + 'history': { + 'feed_id': 'tQFx_TyyEq4J', + 'account_id': 2369613659, + }, } def _real_extract(self, url): From b5dc33daa96fd59ed5e0dc38cb5bb5157433a72b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 13 Jun 2017 23:27:27 +0700 Subject: [PATCH 16/18] [corus] Add support for showcase.ca --- youtube_dl/extractor/corus.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/corus.py b/youtube_dl/extractor/corus.py index 9cdd34636..807a29eea 100644 --- a/youtube_dl/extractor/corus.py +++ b/youtube_dl/extractor/corus.py @@ -13,7 +13,7 @@ class CorusIE(ThePlatformFeedIE): (?:www\.)? (?P (?:globaltv|etcanada)\.com| - (?:hgtv|foodnetwork|slice|history)\.ca + (?:hgtv|foodnetwork|slice|history|showcase)\.ca ) /(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=)) (?P\d+) @@ -39,6 +39,9 @@ class CorusIE(ThePlatformFeedIE): }, { 'url': 'http://www.history.ca/the-world-without-canada/video/full-episodes/natural-resources/video.html?v=955054659646#video', 'only_matching': True, + }, { + 'url': 'http://www.showcase.ca/eyewitness/video/eyewitness++106/video.html?v=955070531919&p=1&s=da#video', + 'only_matching': True, }] _TP_FEEDS = { @@ -66,6 +69,10 @@ class CorusIE(ThePlatformFeedIE): 'feed_id': 'tQFx_TyyEq4J', 'account_id': 2369613659, }, + 'showcase': { + 'feed_id': '9H6qyshBZU3E', + 'account_id': 2414426607, + }, } def _real_extract(self, url): From 7dd5415cd0e824b00e6abf9a18d55701d52babec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 14 Jun 2017 21:33:40 +0700 Subject: [PATCH 17/18] [npo] Improve _VALID_URL (closes #13376) --- youtube_dl/extractor/npo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 79296f0ef..5f8b6def1 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -35,7 +35,7 @@ class NPOIE(NPOBaseIE): https?:// (?:www\.)? (?: - npo\.nl/(?!live|radio)(?:[^/]+/){2}| + npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}| ntr\.nl/(?:[^/]+/){2,}| omroepwnl\.nl/video/fragment/[^/]+__| zapp\.nl/[^/]+/[^/]+/ @@ -150,6 +150,9 @@ class NPOIE(NPOBaseIE): # live stream 'url': 'npo:LI_NL1_4188102', 'only_matching': True, + }, { + 'url': 'http://www.npo.nl/radio-gaga/13-06-2017/BNN_101383373', + 'only_matching': True, }] def _real_extract(self, url): From 0a268c6e11e9fb55f41b474497997bff61d97cd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 14 Jun 2017 22:02:15 +0700 Subject: [PATCH 18/18] [extractor/common] Improve jwplayer formats extraction (closes #13379) --- youtube_dl/extractor/common.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f027447c8..941385ae2 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2328,6 +2328,8 @@ class InfoExtractor(object): urls = [] formats = [] for source in jwplayer_sources_data: + if not isinstance(source, dict): + continue source_url = self._proto_relative_url(source.get('file')) if not source_url: continue