Merge branch 'master'

2017-10-17 16:30:06 -05:00 · 2017-10-17 16:30:06 -05:00 · d473007473
commit d473007473
parent f1a5b546e8 6f3b4a98c9
14 changed files with 116 additions and 25 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@ -6,8 +6,8 @@

 ---

-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.15*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.15**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.15.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.15.1**

 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2017.10.15
+[debug] youtube-dl version 2017.10.15.1
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
--- a/12
+++ b/12
@ -1,3 +1,15 @@
+version 2017.10.15.1
+
+Core
+* [downloader/hls] Ignore anvato ad fragments (#14496)
+* [downloader/fragment] Output ad fragment count
+
+Extractors
+* [scrippsnetworks:watch] Bypass geo restriction
+ [anvato] Add ability to bypass geo restriction
+* [redditr] Fix extraction for URLs with query (#14495)
+
+
 version 2017.10.15

 Core
--- a/youtube_dl/downloader/fragment.py
+++ b/youtube_dl/downloader/fragment.py
@ -117,9 +117,15 @@ class FragmentFD(FileDownloader):
    def _prepare_frag_download(self, ctx):
        if 'live' not in ctx:
            ctx['live'] = False
+        if not ctx['live']:
+            total_frags_str = '%d' % ctx['total_frags']
+            ad_frags = ctx.get('ad_frags', 0)
+            if ad_frags:
+                total_frags_str += ' (not including %d ad)' % ad_frags
+        else:
+            total_frags_str = 'unknown (live)'
        self.to_screen(
-            '[%s] Total fragments: %s'
-            % (self.FD_NAME, ctx['total_frags'] if not ctx['live'] else 'unknown (live)'))
+            '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
        self.report_destination(ctx['filename'])
        dl = HttpQuietDownloader(
            self.ydl,
@ -152,7 +158,7 @@ class FragmentFD(FileDownloader):
            if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
                self._read_ytdl_file(ctx)
                if ctx['fragment_index'] > 0 and resume_len == 0:
-                    self.report_error(
+                    self.report_warning(
                        'Inconsistent state of incomplete fragment download. '
                        'Restarting from the beginning...')
                    ctx['fragment_index'] = resume_len = 0
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@ -75,15 +75,30 @@ class HlsFD(FragmentFD):
                fd.add_progress_hook(ph)
            return fd.real_download(filename, info_dict)

-        total_frags = 0
+        def anvato_ad(s):
+            return s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
+
+        media_frags = 0
+        ad_frags = 0
+        ad_frag_next = False
        for line in s.splitlines():
            line = line.strip()
-            if line and not line.startswith('#'):
-                total_frags += 1
+            if not line:
+                continue
+            if line.startswith('#'):
+                if anvato_ad(line):
+                    ad_frags += 1
+                    ad_frag_next = True
+                continue
+            if ad_frag_next:
+                ad_frag_next = False
+                continue
+            media_frags += 1

        ctx = {
            'filename': filename,
-            'total_frags': total_frags,
+            'total_frags': media_frags,
+            'ad_frags': ad_frags,
        }

        self._prepare_and_start_frag_download(ctx)
@ -101,10 +116,14 @@ class HlsFD(FragmentFD):
        decrypt_info = {'METHOD': 'NONE'}
        byte_range = {}
        frag_index = 0
+        ad_frag_next = False
        for line in s.splitlines():
            line = line.strip()
            if line:
                if not line.startswith('#'):
+                    if ad_frag_next:
+                        ad_frag_next = False
+                        continue
                    frag_index += 1
                    if frag_index <= ctx['fragment_index']:
                        continue
@ -175,6 +194,8 @@ class HlsFD(FragmentFD):
                        'start': sub_range_start,
                        'end': sub_range_start + int(splitted_byte_range[0]),
                    }
+                elif anvato_ad(line):
+                    ad_frag_next = True

        self._finish_frag_download(ctx)

--- a/youtube_dl/extractor/anvato.py
+++ b/youtube_dl/extractor/anvato.py
@ -18,6 +18,7 @@ from ..utils import (
    int_or_none,
    strip_jsonp,
    unescapeHTML,
+    unsmuggle_url,
 )


@ -275,6 +276,9 @@ class AnvatoIE(InfoExtractor):
            anvplayer_data['accessKey'], anvplayer_data['video'])

    def _real_extract(self, url):
+        url, smuggled_data = unsmuggle_url(url, {})
+        self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
+
        mobj = re.match(self._VALID_URL, url)
        access_key, video_id = mobj.group('access_key_or_mcp', 'id')
        if access_key not in self._ANVACK_TABLE:
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@ -6,6 +6,7 @@ import re
 from .common import InfoExtractor
 from ..compat import (
    compat_parse_qs,
+    compat_str,
    compat_urllib_parse_urlparse,
 )
 from ..utils import (
@ -15,6 +16,7 @@ from ..utils import (
    int_or_none,
    NO_DEFAULT,
    qualities,
+    try_get,
    unified_strdate,
 )

@ -80,12 +82,15 @@ class ArteTVBaseIE(InfoExtractor):
        info = self._download_json(json_url, video_id)
        player_info = info['videoJsonPlayer']

-        vsr = player_info['VSR']
-
+        vsr = try_get(player_info, lambda x: x['VSR'], dict)
        if not vsr:
-            raise ExtractorError(
-                'Video %s is not available' % player_info.get('VID') or video_id,
-                expected=True)
+            error = None
+            if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error':
+                error = try_get(
+                    player_info, lambda x: x['custom_msg']['msg'], compat_str)
+            if not error:
+                error = 'Video %s is not available' % player_info.get('VID') or video_id
+            raise ExtractorError(error, expected=True)

        upload_date_str = player_info.get('shootingDate')
        if not upload_date_str:
--- a/youtube_dl/extractor/drtv.py
+++ b/youtube_dl/extractor/drtv.py
@ -138,6 +138,7 @@ class DRTVIE(InfoExtractor):
                            'tbr': int_or_none(bitrate),
                            'ext': link.get('FileFormat'),
                            'vcodec': 'none' if kind == 'AudioResource' else None,
+                            'preference': preference,
                        })
                subtitles_list = asset.get('SubtitlesList')
                if isinstance(subtitles_list, list):
--- a/youtube_dl/extractor/eporner.py
+++ b/youtube_dl/extractor/eporner.py
@ -15,7 +15,7 @@ from ..utils import (


 class EpornerIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
+    _VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:hd-porn|embed)/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
    _TESTS = [{
        'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
        'md5': '39d486f046212d8e1b911c52ab4691f8',
@ -35,6 +35,9 @@ class EpornerIE(InfoExtractor):
    }, {
        'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
        'only_matching': True,
+    }, {
+        'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
--- a/youtube_dl/extractor/megaphone.py
+++ b/youtube_dl/extractor/megaphone.py
@ -18,7 +18,7 @@ class MegaphoneIE(InfoExtractor):
            'id': 'GLT9749789991',
            'ext': 'mp3',
            'title': '#97 What Kind Of Idiot Gets Phished?',
-            'thumbnail': 're:^https://.*\.png.*$',
+            'thumbnail': r're:^https://.*\.png.*$',
            'duration': 1776.26375,
            'author': 'Reply All',
        },
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@ -40,7 +40,7 @@ class NiconicoIE(InfoExtractor):
            'uploader': 'takuya0301',
            'uploader_id': '2698420',
            'upload_date': '20131123',
-            'timestamp': 1385182762,
+            'timestamp': int,  # timestamp is unstable
            'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
            'duration': 33,
            'view_count': int,
@ -115,8 +115,8 @@ class NiconicoIE(InfoExtractor):
        'skip': 'Requires an account',
    }, {
        # "New" HTML5 video
+        # md5 is unstable
        'url': 'http://www.nicovideo.jp/watch/sm31464864',
-        'md5': '351647b4917660986dc0fa8864085135',
        'info_dict': {
            'id': 'sm31464864',
            'ext': 'mp4',
@ -124,7 +124,7 @@ class NiconicoIE(InfoExtractor):
            'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
            'timestamp': 1498514060,
            'upload_date': '20170626',
-            'uploader': 'ゲス',
+            'uploader': 'ゲスト',
            'uploader_id': '40826363',
            'thumbnail': r're:https?://.*',
            'duration': 198,
@ -132,6 +132,25 @@ class NiconicoIE(InfoExtractor):
            'comment_count': int,
        },
        'skip': 'Requires an account',
+    }, {
+        # Video without owner
+        'url': 'http://www.nicovideo.jp/watch/sm18238488',
+        'md5': 'd265680a1f92bdcbbd2a507fc9e78a9e',
+        'info_dict': {
+            'id': 'sm18238488',
+            'ext': 'mp4',
+            'title': '【実写版】ミュータントタートルズ',
+            'description': 'md5:15df8988e47a86f9e978af2064bf6d8e',
+            'timestamp': 1341160408,
+            'upload_date': '20120701',
+            'uploader': None,
+            'uploader_id': None,
+            'thumbnail': r're:https?://.*',
+            'duration': 5271,
+            'view_count': int,
+            'comment_count': int,
+        },
+        'skip': 'Requires an account',
    }, {
        'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
        'only_matching': True,
@ -395,7 +414,9 @@ class NiconicoIE(InfoExtractor):

        webpage_url = get_video_info('watch_url') or url

-        owner = api_data.get('owner', {})
+        # Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
+        # in the JSON, which will cause None to be returned instead of {}.
+        owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
        uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
        uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')

--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@ -187,7 +187,7 @@ class PBSIE(InfoExtractor):
    _VALID_URL = r'''(?x)https?://
        (?:
           # Direct video URL
-           (?:%s)/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
+           (?:%s)/(?:(?:vir|port)alplayer|video)/(?P<id>[0-9]+)(?:[?/]|$) |
           # Article with embedded player (or direct video)
           (?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
           # Player
@ -367,6 +367,10 @@ class PBSIE(InfoExtractor):
        {
            'url': 'http://watch.knpb.org/video/2365616055/',
            'only_matching': True,
+        },
+        {
+            'url': 'https://player.pbs.org/portalplayer/3004638221/?uid=',
+            'only_matching': True,
        }
    ]
    _ERRORS = {
--- a/youtube_dl/extractor/scrippsnetworks.py
+++ b/youtube_dl/extractor/scrippsnetworks.py
@ -10,6 +10,7 @@ import re
 from .common import InfoExtractor
 from .anvato import AnvatoIE
 from ..utils import (
+    smuggle_url,
    urlencode_postdata,
    xpath_text,
 )
@ -183,5 +184,7 @@ x-api-key:%(key)s
            })['results'][0]['mcpId']

        return self.url_result(
-            'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id,
+            smuggle_url(
+                'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id,
+                {'geo_countries': ['US']}),
            AnvatoIE.ie_key(), video_id=mcp_id)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -1622,6 +1622,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        # description
        description_original = video_description = get_element_by_id("eow-description", video_webpage)
        if video_description:
+
+            def replace_url(m):
+                redir_url = compat_urlparse.urljoin(url, m.group(1))
+                parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
+                if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
+                    qs = compat_parse_qs(parsed_redir_url.query)
+                    q = qs.get('q')
+                    if q and q[0]:
+                        return q[0]
+                return redir_url
+
            description_original = video_description = re.sub(r'''(?x)
                <a\s+
                    (?:[a-zA-Z-]+="[^"]*"\s+)*?
@ -1630,7 +1641,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                    class="[^"]*"[^>]*>
                [^<]+\.{3}\s*
                </a>
-            ''', lambda m: compat_urlparse.urljoin(url, m.group(1)), video_description)
+            ''', replace_url, video_description)
            video_description = clean_html(video_description)
        else:
            fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2017.10.15'
+__version__ = '2017.10.15.1'