diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 2feb22dd0..1a72ab291 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.15*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.15** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.15.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.15.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.10.15 +[debug] youtube-dl version 2017.10.15.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 3b3ff459b..d728e4d03 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +version 2017.10.15.1 + +Core +* [downloader/hls] Ignore anvato ad fragments (#14496) +* [downloader/fragment] Output ad fragment count + +Extractors +* [scrippsnetworks:watch] Bypass geo restriction ++ [anvato] Add ability to bypass geo restriction +* [redditr] Fix extraction for URLs with query (#14495) + + version 2017.10.15 Core diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 6f6fb4a77..93002e45a 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -117,9 +117,15 @@ class FragmentFD(FileDownloader): def _prepare_frag_download(self, ctx): if 'live' not in ctx: ctx['live'] = False + if not ctx['live']: + total_frags_str = '%d' % ctx['total_frags'] + ad_frags = ctx.get('ad_frags', 0) + if ad_frags: + total_frags_str += ' (not including %d ad)' % ad_frags + else: + total_frags_str = 'unknown (live)' self.to_screen( - '[%s] Total fragments: %s' - % (self.FD_NAME, ctx['total_frags'] if not ctx['live'] else 'unknown (live)')) + '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str)) self.report_destination(ctx['filename']) dl = HttpQuietDownloader( self.ydl, @@ -152,7 +158,7 @@ class FragmentFD(FileDownloader): if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))): self._read_ytdl_file(ctx) if ctx['fragment_index'] > 0 and resume_len == 0: - self.report_error( + self.report_warning( 'Inconsistent state of incomplete fragment download. ' 'Restarting from the beginning...') ctx['fragment_index'] = resume_len = 0 diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 46308cf07..1a6e226c8 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -75,15 +75,30 @@ class HlsFD(FragmentFD): fd.add_progress_hook(ph) return fd.real_download(filename, info_dict) - total_frags = 0 + def anvato_ad(s): + return s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s + + media_frags = 0 + ad_frags = 0 + ad_frag_next = False for line in s.splitlines(): line = line.strip() - if line and not line.startswith('#'): - total_frags += 1 + if not line: + continue + if line.startswith('#'): + if anvato_ad(line): + ad_frags += 1 + ad_frag_next = True + continue + if ad_frag_next: + ad_frag_next = False + continue + media_frags += 1 ctx = { 'filename': filename, - 'total_frags': total_frags, + 'total_frags': media_frags, + 'ad_frags': ad_frags, } self._prepare_and_start_frag_download(ctx) @@ -101,10 +116,14 @@ class HlsFD(FragmentFD): decrypt_info = {'METHOD': 'NONE'} byte_range = {} frag_index = 0 + ad_frag_next = False for line in s.splitlines(): line = line.strip() if line: if not line.startswith('#'): + if ad_frag_next: + ad_frag_next = False + continue frag_index += 1 if frag_index <= ctx['fragment_index']: continue @@ -175,6 +194,8 @@ class HlsFD(FragmentFD): 'start': sub_range_start, 'end': sub_range_start + int(splitted_byte_range[0]), } + elif anvato_ad(line): + ad_frag_next = True self._finish_frag_download(ctx) diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py index e443ecff6..7a29cd2c6 100644 --- a/youtube_dl/extractor/anvato.py +++ b/youtube_dl/extractor/anvato.py @@ -18,6 +18,7 @@ from ..utils import ( int_or_none, strip_jsonp, unescapeHTML, + unsmuggle_url, ) @@ -275,6 +276,9 @@ class AnvatoIE(InfoExtractor): anvplayer_data['accessKey'], anvplayer_data['video']) def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + self._initialize_geo_bypass(smuggled_data.get('geo_countries')) + mobj = re.match(self._VALID_URL, url) access_key, video_id = mobj.group('access_key_or_mcp', 'id') if access_key not in self._ANVACK_TABLE: diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 5cde90c5b..ffc321821 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -6,6 +6,7 @@ import re from .common import InfoExtractor from ..compat import ( compat_parse_qs, + compat_str, compat_urllib_parse_urlparse, ) from ..utils import ( @@ -15,6 +16,7 @@ from ..utils import ( int_or_none, NO_DEFAULT, qualities, + try_get, unified_strdate, ) @@ -80,12 +82,15 @@ class ArteTVBaseIE(InfoExtractor): info = self._download_json(json_url, video_id) player_info = info['videoJsonPlayer'] - vsr = player_info['VSR'] - + vsr = try_get(player_info, lambda x: x['VSR'], dict) if not vsr: - raise ExtractorError( - 'Video %s is not available' % player_info.get('VID') or video_id, - expected=True) + error = None + if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error': + error = try_get( + player_info, lambda x: x['custom_msg']['msg'], compat_str) + if not error: + error = 'Video %s is not available' % player_info.get('VID') or video_id + raise ExtractorError(error, expected=True) upload_date_str = player_info.get('shootingDate') if not upload_date_str: diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 69effba58..f757745ba 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -138,6 +138,7 @@ class DRTVIE(InfoExtractor): 'tbr': int_or_none(bitrate), 'ext': link.get('FileFormat'), 'vcodec': 'none' if kind == 'AudioResource' else None, + 'preference': preference, }) subtitles_list = asset.get('SubtitlesList') if isinstance(subtitles_list, list): diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py index f3734e9f8..81f2e2ee1 100644 --- a/youtube_dl/extractor/eporner.py +++ b/youtube_dl/extractor/eporner.py @@ -15,7 +15,7 @@ from ..utils import ( class EpornerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P\w+)(?:/(?P[\w-]+))?' + _VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:hd-porn|embed)/(?P\w+)(?:/(?P[\w-]+))?' _TESTS = [{ 'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/', 'md5': '39d486f046212d8e1b911c52ab4691f8', @@ -35,6 +35,9 @@ class EpornerIE(InfoExtractor): }, { 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0', 'only_matching': True, + }, { + 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/megaphone.py b/youtube_dl/extractor/megaphone.py index 60e3caf0d..5bafa6cf4 100644 --- a/youtube_dl/extractor/megaphone.py +++ b/youtube_dl/extractor/megaphone.py @@ -18,7 +18,7 @@ class MegaphoneIE(InfoExtractor): 'id': 'GLT9749789991', 'ext': 'mp3', 'title': '#97 What Kind Of Idiot Gets Phished?', - 'thumbnail': 're:^https://.*\.png.*$', + 'thumbnail': r're:^https://.*\.png.*$', 'duration': 1776.26375, 'author': 'Reply All', }, diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 026329d3e..df7f528be 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -40,7 +40,7 @@ class NiconicoIE(InfoExtractor): 'uploader': 'takuya0301', 'uploader_id': '2698420', 'upload_date': '20131123', - 'timestamp': 1385182762, + 'timestamp': int, # timestamp is unstable 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org', 'duration': 33, 'view_count': int, @@ -115,8 +115,8 @@ class NiconicoIE(InfoExtractor): 'skip': 'Requires an account', }, { # "New" HTML5 video + # md5 is unstable 'url': 'http://www.nicovideo.jp/watch/sm31464864', - 'md5': '351647b4917660986dc0fa8864085135', 'info_dict': { 'id': 'sm31464864', 'ext': 'mp4', @@ -124,7 +124,7 @@ class NiconicoIE(InfoExtractor): 'description': 'md5:e52974af9a96e739196b2c1ca72b5feb', 'timestamp': 1498514060, 'upload_date': '20170626', - 'uploader': 'ゲス', + 'uploader': 'ゲスト', 'uploader_id': '40826363', 'thumbnail': r're:https?://.*', 'duration': 198, @@ -132,6 +132,25 @@ class NiconicoIE(InfoExtractor): 'comment_count': int, }, 'skip': 'Requires an account', + }, { + # Video without owner + 'url': 'http://www.nicovideo.jp/watch/sm18238488', + 'md5': 'd265680a1f92bdcbbd2a507fc9e78a9e', + 'info_dict': { + 'id': 'sm18238488', + 'ext': 'mp4', + 'title': '【実写版】ミュータントタートルズ', + 'description': 'md5:15df8988e47a86f9e978af2064bf6d8e', + 'timestamp': 1341160408, + 'upload_date': '20120701', + 'uploader': None, + 'uploader_id': None, + 'thumbnail': r're:https?://.*', + 'duration': 5271, + 'view_count': int, + 'comment_count': int, + }, + 'skip': 'Requires an account', }, { 'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg', 'only_matching': True, @@ -395,7 +414,9 @@ class NiconicoIE(InfoExtractor): webpage_url = get_video_info('watch_url') or url - owner = api_data.get('owner', {}) + # Note: cannot use api_data.get('owner', {}) because owner may be set to "null" + # in the JSON, which will cause None to be returned instead of {}. + owner = try_get(api_data, lambda x: x.get('owner'), dict) or {} uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id') uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname') diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 8889e4a1a..b51dcbe10 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -187,7 +187,7 @@ class PBSIE(InfoExtractor): _VALID_URL = r'''(?x)https?:// (?: # Direct video URL - (?:%s)/(?:viralplayer|video)/(?P[0-9]+)/? | + (?:%s)/(?:(?:vir|port)alplayer|video)/(?P[0-9]+)(?:[?/]|$) | # Article with embedded player (or direct video) (?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P[^/]+?)(?:\.html)?/?(?:$|[?\#]) | # Player @@ -367,6 +367,10 @@ class PBSIE(InfoExtractor): { 'url': 'http://watch.knpb.org/video/2365616055/', 'only_matching': True, + }, + { + 'url': 'https://player.pbs.org/portalplayer/3004638221/?uid=', + 'only_matching': True, } ] _ERRORS = { diff --git a/youtube_dl/extractor/scrippsnetworks.py b/youtube_dl/extractor/scrippsnetworks.py index 411fef84c..b446a02ba 100644 --- a/youtube_dl/extractor/scrippsnetworks.py +++ b/youtube_dl/extractor/scrippsnetworks.py @@ -10,6 +10,7 @@ import re from .common import InfoExtractor from .anvato import AnvatoIE from ..utils import ( + smuggle_url, urlencode_postdata, xpath_text, ) @@ -183,5 +184,7 @@ x-api-key:%(key)s })['results'][0]['mcpId'] return self.url_result( - 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id, + smuggle_url( + 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id, + {'geo_countries': ['US']}), AnvatoIE.ie_key(), video_id=mcp_id) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4e8db240d..5aef555fb 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1622,6 +1622,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # description description_original = video_description = get_element_by_id("eow-description", video_webpage) if video_description: + + def replace_url(m): + redir_url = compat_urlparse.urljoin(url, m.group(1)) + parsed_redir_url = compat_urllib_parse_urlparse(redir_url) + if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect': + qs = compat_parse_qs(parsed_redir_url.query) + q = qs.get('q') + if q and q[0]: + return q[0] + return redir_url + description_original = video_description = re.sub(r'''(?x) ]*> [^<]+\.{3}\s* - ''', lambda m: compat_urlparse.urljoin(url, m.group(1)), video_description) + ''', replace_url, video_description) video_description = clean_html(video_description) else: fd_mobj = re.search(r'