From ee093a0ea04d973cc6dbd0d53b57c976a9e68dad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Oct 2017 06:11:02 +0700 Subject: [PATCH 01/15] [anvato] Add ability to bypass geo restriction --- youtube_dl/extractor/anvato.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py index e443ecff6..7a29cd2c6 100644 --- a/youtube_dl/extractor/anvato.py +++ b/youtube_dl/extractor/anvato.py @@ -18,6 +18,7 @@ from ..utils import ( int_or_none, strip_jsonp, unescapeHTML, + unsmuggle_url, ) @@ -275,6 +276,9 @@ class AnvatoIE(InfoExtractor): anvplayer_data['accessKey'], anvplayer_data['video']) def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + self._initialize_geo_bypass(smuggled_data.get('geo_countries')) + mobj = re.match(self._VALID_URL, url) access_key, video_id = mobj.group('access_key_or_mcp', 'id') if access_key not in self._ANVACK_TABLE: From 4827270526621cdabe74275deb38b04c2ef1b0d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Oct 2017 06:11:35 +0700 Subject: [PATCH 02/15] [scrippsnetworks:watch] Bypass geo restriction --- youtube_dl/extractor/scrippsnetworks.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/scrippsnetworks.py b/youtube_dl/extractor/scrippsnetworks.py index 411fef84c..b446a02ba 100644 --- a/youtube_dl/extractor/scrippsnetworks.py +++ b/youtube_dl/extractor/scrippsnetworks.py @@ -10,6 +10,7 @@ import re from .common import InfoExtractor from .anvato import AnvatoIE from ..utils import ( + smuggle_url, urlencode_postdata, xpath_text, ) @@ -183,5 +184,7 @@ x-api-key:%(key)s })['results'][0]['mcpId'] return self.url_result( - 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id, + smuggle_url( + 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id, + {'geo_countries': ['US']}), AnvatoIE.ie_key(), video_id=mcp_id) From 5efaf43c93247446c8616454bb3f59b7ae13fcb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Oct 2017 06:13:07 +0700 Subject: [PATCH 03/15] [downloader/fragment] Output ad fragment count --- youtube_dl/downloader/fragment.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 6f6fb4a77..7e891b92a 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -117,9 +117,15 @@ class FragmentFD(FileDownloader): def _prepare_frag_download(self, ctx): if 'live' not in ctx: ctx['live'] = False + if not ctx['live']: + total_frags_str = '%d' % ctx['total_frags'] + ad_frags = ctx.get('ad_frags', 0) + if ad_frags: + total_frags_str += ' (not including %d ad)' % ad_frags + else: + total_frags_str = 'unknown (live)' self.to_screen( - '[%s] Total fragments: %s' - % (self.FD_NAME, ctx['total_frags'] if not ctx['live'] else 'unknown (live)')) + '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str)) self.report_destination(ctx['filename']) dl = HttpQuietDownloader( self.ydl, From 74c42d9ec3c6e0f0f67923fd151ce2531637a3db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Oct 2017 06:13:48 +0700 Subject: [PATCH 04/15] [downloader/hls] Ignore anvato ad fragments (closes #14496) --- youtube_dl/downloader/hls.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 46308cf07..7955ca510 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -75,15 +75,29 @@ class HlsFD(FragmentFD): fd.add_progress_hook(ph) return fd.real_download(filename, info_dict) - total_frags = 0 + def anvato_ad(s): + return s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s + + media_frags = 0 + ad_frags = 0 + ad_frag_next = False for line in s.splitlines(): line = line.strip() - if line and not line.startswith('#'): - total_frags += 1 + if not line: + continue + if line.startswith('#'): + if anvato_ad(line): + ad_frags += 1 + continue + if ad_frag_next: + ad_frag_next = False + continue + media_frags += 1 ctx = { 'filename': filename, - 'total_frags': total_frags, + 'total_frags': media_frags, + 'ad_frags': ad_frags, } self._prepare_and_start_frag_download(ctx) @@ -101,10 +115,14 @@ class HlsFD(FragmentFD): decrypt_info = {'METHOD': 'NONE'} byte_range = {} frag_index = 0 + ad_frag_next = False for line in s.splitlines(): line = line.strip() if line: if not line.startswith('#'): + if ad_frag_next: + ad_frag_next = False + continue frag_index += 1 if frag_index <= ctx['fragment_index']: continue @@ -175,6 +193,8 @@ class HlsFD(FragmentFD): 'start': sub_range_start, 'end': sub_range_start + int(splitted_byte_range[0]), } + elif anvato_ad(line): + ad_frag_next = True self._finish_frag_download(ctx) From bd7e1406b31ed18c48d51486262465c88bd92866 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Oct 2017 06:15:37 +0700 Subject: [PATCH 05/15] [ChangeLog] Actualize --- ChangeLog | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ChangeLog b/ChangeLog index 3b3ff459b..4eb41a7a6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +version + +Core +* [downloader/hls] Ignore anvato ad fragments (#14496) +* [downloader/fragment] Output ad fragment count + +Extractors +* [scrippsnetworks:watch] Bypass geo restriction ++ [anvato] Add ability to bypass geo restriction +* [redditr] Fix extraction for URLs with query (#14495) + + version 2017.10.15 Core From 7e721e35daa954f2c12f1113950ae07f1302f49e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Oct 2017 06:16:41 +0700 Subject: [PATCH 06/15] release 2017.10.15.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 2feb22dd0..1a72ab291 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.15*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.15** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.15.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.15.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.10.15 +[debug] youtube-dl version 2017.10.15.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 4eb41a7a6..d728e4d03 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.10.15.1 Core * [downloader/hls] Ignore anvato ad fragments (#14496) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 49d475e85..d01ba3095 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.10.15' +__version__ = '2017.10.15.1' From aaab8c5e711c773885f67fd53788c89d94d48031 Mon Sep 17 00:00:00 2001 From: Pawit Pornkitprasan Date: Sun, 15 Oct 2017 10:40:57 +0700 Subject: [PATCH 07/15] [niconico] Improve uploader metadata extraction robustness (closes #14135) --- youtube_dl/extractor/niconico.py | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 026329d3e..df7f528be 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -40,7 +40,7 @@ class NiconicoIE(InfoExtractor): 'uploader': 'takuya0301', 'uploader_id': '2698420', 'upload_date': '20131123', - 'timestamp': 1385182762, + 'timestamp': int, # timestamp is unstable 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org', 'duration': 33, 'view_count': int, @@ -115,8 +115,8 @@ class NiconicoIE(InfoExtractor): 'skip': 'Requires an account', }, { # "New" HTML5 video + # md5 is unstable 'url': 'http://www.nicovideo.jp/watch/sm31464864', - 'md5': '351647b4917660986dc0fa8864085135', 'info_dict': { 'id': 'sm31464864', 'ext': 'mp4', @@ -124,7 +124,7 @@ class NiconicoIE(InfoExtractor): 'description': 'md5:e52974af9a96e739196b2c1ca72b5feb', 'timestamp': 1498514060, 'upload_date': '20170626', - 'uploader': 'ゲス', + 'uploader': 'ゲスト', 'uploader_id': '40826363', 'thumbnail': r're:https?://.*', 'duration': 198, @@ -132,6 +132,25 @@ class NiconicoIE(InfoExtractor): 'comment_count': int, }, 'skip': 'Requires an account', + }, { + # Video without owner + 'url': 'http://www.nicovideo.jp/watch/sm18238488', + 'md5': 'd265680a1f92bdcbbd2a507fc9e78a9e', + 'info_dict': { + 'id': 'sm18238488', + 'ext': 'mp4', + 'title': '【実写版】ミュータントタートルズ', + 'description': 'md5:15df8988e47a86f9e978af2064bf6d8e', + 'timestamp': 1341160408, + 'upload_date': '20120701', + 'uploader': None, + 'uploader_id': None, + 'thumbnail': r're:https?://.*', + 'duration': 5271, + 'view_count': int, + 'comment_count': int, + }, + 'skip': 'Requires an account', }, { 'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg', 'only_matching': True, @@ -395,7 +414,9 @@ class NiconicoIE(InfoExtractor): webpage_url = get_video_info('watch_url') or url - owner = api_data.get('owner', {}) + # Note: cannot use api_data.get('owner', {}) because owner may be set to "null" + # in the JSON, which will cause None to be returned instead of {}. + owner = try_get(api_data, lambda x: x.get('owner'), dict) or {} uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id') uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname') From a9ee4f6e49a5910b432d0ba4d8dc60231aa7deba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Oct 2017 11:03:54 +0700 Subject: [PATCH 08/15] [downloader/hls] Fix total fragments count when ad fragments exist --- youtube_dl/downloader/hls.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 7955ca510..1a6e226c8 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -88,6 +88,7 @@ class HlsFD(FragmentFD): if line.startswith('#'): if anvato_ad(line): ad_frags += 1 + ad_frag_next = True continue if ad_frag_next: ad_frag_next = False From 8cc1840ccb2f17837fc7f12ceea91826eb925ecc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Oct 2017 22:12:34 +0700 Subject: [PATCH 09/15] [arte] Capture and output error message --- youtube_dl/extractor/arte.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 5cde90c5b..ffc321821 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -6,6 +6,7 @@ import re from .common import InfoExtractor from ..compat import ( compat_parse_qs, + compat_str, compat_urllib_parse_urlparse, ) from ..utils import ( @@ -15,6 +16,7 @@ from ..utils import ( int_or_none, NO_DEFAULT, qualities, + try_get, unified_strdate, ) @@ -80,12 +82,15 @@ class ArteTVBaseIE(InfoExtractor): info = self._download_json(json_url, video_id) player_info = info['videoJsonPlayer'] - vsr = player_info['VSR'] - + vsr = try_get(player_info, lambda x: x['VSR'], dict) if not vsr: - raise ExtractorError( - 'Video %s is not available' % player_info.get('VID') or video_id, - expected=True) + error = None + if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error': + error = try_get( + player_info, lambda x: x['custom_msg']['msg'], compat_str) + if not error: + error = 'Video %s is not available' % player_info.get('VID') or video_id + raise ExtractorError(error, expected=True) upload_date_str = player_info.get('shootingDate') if not upload_date_str: From acc4ea6237f7a2c7d923eca5b004aefbb6a6a766 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Oct 2017 05:11:25 +0700 Subject: [PATCH 10/15] [eporner] Add support for embed URLs (closes #14507) --- youtube_dl/extractor/eporner.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py index f3734e9f8..81f2e2ee1 100644 --- a/youtube_dl/extractor/eporner.py +++ b/youtube_dl/extractor/eporner.py @@ -15,7 +15,7 @@ from ..utils import ( class EpornerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P\w+)(?:/(?P[\w-]+))?' + _VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:hd-porn|embed)/(?P\w+)(?:/(?P[\w-]+))?' _TESTS = [{ 'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/', 'md5': '39d486f046212d8e1b911c52ab4691f8', @@ -35,6 +35,9 @@ class EpornerIE(InfoExtractor): }, { 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0', 'only_matching': True, + }, { + 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0', + 'only_matching': True, }] def _real_extract(self, url): From 83fcf19e2d517a2862fd89dffb1d0abad024a5b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Oct 2017 05:48:45 +0700 Subject: [PATCH 11/15] [drtv] Respect preference for direct http formats (#14509) --- youtube_dl/extractor/drtv.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 69effba58..f757745ba 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -138,6 +138,7 @@ class DRTVIE(InfoExtractor): 'tbr': int_or_none(bitrate), 'ext': link.get('FileFormat'), 'vcodec': 'none' if kind == 'AudioResource' else None, + 'preference': preference, }) subtitles_list = asset.get('SubtitlesList') if isinstance(subtitles_list, list): From c233003afe34ba9db7861bf44d5097ade870c739 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 17 Oct 2017 15:39:06 +0800 Subject: [PATCH 12/15] [megaphone] Fix deprecated escape sequence --- youtube_dl/extractor/megaphone.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/megaphone.py b/youtube_dl/extractor/megaphone.py index 60e3caf0d..5bafa6cf4 100644 --- a/youtube_dl/extractor/megaphone.py +++ b/youtube_dl/extractor/megaphone.py @@ -18,7 +18,7 @@ class MegaphoneIE(InfoExtractor): 'id': 'GLT9749789991', 'ext': 'mp3', 'title': '#97 What Kind Of Idiot Gets Phished?', - 'thumbnail': 're:^https://.*\.png.*$', + 'thumbnail': r're:^https://.*\.png.*$', 'duration': 1776.26375, 'author': 'Reply All', }, From 6b9cbd023f1206f90e60cbed4497e6b107438542 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 17 Oct 2017 09:22:43 +0000 Subject: [PATCH 13/15] [pbs] restrict direct video url regex(fixes #14519) --- youtube_dl/extractor/pbs.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 8889e4a1a..b51dcbe10 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -187,7 +187,7 @@ class PBSIE(InfoExtractor): _VALID_URL = r'''(?x)https?:// (?: # Direct video URL - (?:%s)/(?:viralplayer|video)/(?P[0-9]+)/? | + (?:%s)/(?:(?:vir|port)alplayer|video)/(?P[0-9]+)(?:[?/]|$) | # Article with embedded player (or direct video) (?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P[^/]+?)(?:\.html)?/?(?:$|[?\#]) | # Player @@ -367,6 +367,10 @@ class PBSIE(InfoExtractor): { 'url': 'http://watch.knpb.org/video/2365616055/', 'only_matching': True, + }, + { + 'url': 'https://player.pbs.org/portalplayer/3004638221/?uid=', + 'only_matching': True, } ] _ERRORS = { From fa4bc6e71261613cf530437a2407ff7b61ea6cb5 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 17 Oct 2017 10:07:37 +0000 Subject: [PATCH 14/15] [youtube] replace youtube redirect urls in description(fixes #14517) --- youtube_dl/extractor/youtube.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4e8db240d..5aef555fb 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1622,6 +1622,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # description description_original = video_description = get_element_by_id("eow-description", video_webpage) if video_description: + + def replace_url(m): + redir_url = compat_urlparse.urljoin(url, m.group(1)) + parsed_redir_url = compat_urllib_parse_urlparse(redir_url) + if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect': + qs = compat_parse_qs(parsed_redir_url.query) + q = qs.get('q') + if q and q[0]: + return q[0] + return redir_url + description_original = video_description = re.sub(r'''(?x) ]*> [^<]+\.{3}\s* - ''', lambda m: compat_urlparse.urljoin(url, m.group(1)), video_description) + ''', replace_url, video_description) video_description = clean_html(video_description) else: fd_mobj = re.search(r' Date: Tue, 17 Oct 2017 22:53:34 +0700 Subject: [PATCH 15/15] [downloader/fragment] Report warning instead of error on inconsistent download state --- youtube_dl/downloader/fragment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 7e891b92a..93002e45a 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -158,7 +158,7 @@ class FragmentFD(FileDownloader): if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))): self._read_ytdl_file(ctx) if ctx['fragment_index'] > 0 and resume_len == 0: - self.report_error( + self.report_warning( 'Inconsistent state of incomplete fragment download. ' 'Restarting from the beginning...') ctx['fragment_index'] = resume_len = 0