Merge branch 'master' into GoogleDrive-issue-13619

2017-07-14 15:16:50 -05:00 · 2017-07-14 15:16:50 -05:00 · a37d79540e
commit a37d79540e
parent 9a2158f7e8 2583c0b54e
9 changed files with 44 additions and 10 deletions
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@ -527,6 +527,8 @@ class TestYoutubeDL(unittest.TestCase):
            'ext': 'mp4',
            'width': None,
            'height': 1080,
            'title1': '$PATH',
            'title2': '%PATH%',
        }
        def fname(templ):
@ -545,10 +547,14 @@ class TestYoutubeDL(unittest.TestCase):
        self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
        self.assertEqual(fname('%(height)0   6d.%(ext)s'), ' 01080.mp4')
        self.assertEqual(fname('%(height)   0   6d.%(ext)s'), ' 01080.mp4')
        self.assertEqual(fname('%%'), '%')
        self.assertEqual(fname('%%%%'), '%%')
        self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4')
        self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4')
        self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s')
        self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4')
        self.assertEqual(fname('Hello %(title1)s'), 'Hello $PATH')
        self.assertEqual(fname('Hello %(title2)s'), 'Hello %PATH%')
    def test_format_note(self):
        ydl = YoutubeDL()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -20,6 +20,7 @@ import re
 import shutil
 import subprocess
 import socket
 import string
 import sys
 import time
 import tokenize
@ -674,7 +675,19 @@ class YoutubeDL(object):
                        FORMAT_RE.format(numeric_field),
                        r'%({0})s'.format(numeric_field), outtmpl)
-            filename = expand_path(outtmpl % template_dict)
+            # expand_path translates '%%' into '%' and '$$' into '$'
            # correspondingly that is not what we want since we need to keep
            # '%%' intact for template dict substitution step. Working around
            # with boundary-alike separator hack.
            sep = ''.join([random.choice(string.ascii_letters) for _ in range(32)])
            outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
            # outtmpl should be expand_path'ed before template dict substitution
            # because meta fields may contain env variables we don't want to
            # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
            # title "Hello $PATH", we don't want `$PATH` to be expanded.
            filename = expand_path(outtmpl).replace(sep, '') % template_dict
            # Temporary fix for #4787
            # 'Treat' all problem characters by passing filename through preferredencoding
            # to workaround encoding issues with subprocess on python2 @ Windows
--- a/youtube_dl/downloader/ism.py
+++ b/youtube_dl/downloader/ism.py
@ -98,7 +98,7 @@ def write_piff_header(stream, params):
    if is_audio:
        smhd_payload = s88.pack(0)  # balance
-        smhd_payload = u16.pack(0)  # reserved
+        smhd_payload += u16.pack(0)  # reserved
        media_header_box = full_box(b'smhd', 0, 0, smhd_payload)  # Sound Media Header
    else:
        vmhd_payload = u16.pack(0)  # graphics mode
@ -126,7 +126,6 @@ def write_piff_header(stream, params):
        if fourcc == 'AACL':
            sample_entry_box = box(b'mp4a', sample_entry_payload)
    else:
        sample_entry_payload = sample_entry_payload
        sample_entry_payload += u16.pack(0)  # pre defined
        sample_entry_payload += u16.pack(0)  # reserved
        sample_entry_payload += u32.pack(0) * 3  # pre defined
--- a/youtube_dl/extractor/audioboom.py
+++ b/youtube_dl/extractor/audioboom.py
@ -43,7 +43,7 @@ class AudioBoomIE(InfoExtractor):
        def from_clip(field):
            if clip:
-                clip.get(field)
+                return clip.get(field)
        audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
            'audio', webpage, 'audio url')
--- a/youtube_dl/extractor/fivetv.py
+++ b/youtube_dl/extractor/fivetv.py
@ -43,7 +43,7 @@ class FiveTVIE(InfoExtractor):
        'info_dict': {
            'id': 'glavnoe',
            'ext': 'mp4',
-            'title': 'Итоги недели с 8 по 14 июня 2015 года',
+            'title': r're:^Итоги недели с \d+ по \d+ \w+ \d{4} года$',
            'thumbnail': r're:^https?://.*\.jpg$',
        },
    }, {
@ -70,7 +70,8 @@ class FiveTVIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id)
        video_url = self._search_regex(
-            r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"',
+            [r'<div[^>]+?class="flowplayer[^>]+?data-href="([^"]+)"',
             r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"'],
            webpage, 'video url')
        title = self._og_search_title(webpage, default=None) or self._search_regex(
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -2740,7 +2740,7 @@ class GenericIE(InfoExtractor):
        rutube_urls = RutubeIE._extract_urls(webpage)
        if rutube_urls:
            return self.playlist_from_matches(
-                rutube_urls, ie=RutubeIE.ie_key())
+                rutube_urls, video_id, video_title, ie=RutubeIE.ie_key())
        # Look for WashingtonPost embeds
        wapo_urls = WashingtonPostIE._extract_urls(webpage)
--- a/youtube_dl/extractor/karrierevideos.py
+++ b/youtube_dl/extractor/karrierevideos.py
@ -48,7 +48,7 @@ class KarriereVideosIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id)
        title = (self._html_search_meta('title', webpage, default=None) or
-                 self._search_regex(r'<h1 class="title">([^<]+)</h1>'))
+                 self._search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'video title'))
        video_id = self._search_regex(
            r'/config/video/(.+?)\.xml', webpage, 'video id')
--- a/youtube_dl/extractor/slideshare.py
+++ b/youtube_dl/extractor/slideshare.py
@ -31,7 +31,7 @@ class SlideshareIE(InfoExtractor):
        page_title = mobj.group('title')
        webpage = self._download_webpage(url, page_title)
        slideshare_obj = self._search_regex(
-            r'\$\.extend\(slideshare_object,\s*(\{.*?\})\);',
+            r'\$\.extend\(.*?slideshare_object,\s*(\{.*?\})\);',
            webpage, 'slideshare object')
        info = json.loads(slideshare_obj)
        if info['slideshow']['type'] != 'video':
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -673,6 +673,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            },
        },
        # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
        # YouTube Red ad is not captured for creator
        {
            'url': '__2ABJjxzNo',
            'info_dict': {
@ -1649,7 +1650,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            video_webpage, 'license', default=None)
        m_music = re.search(
-            r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li',
+            r'''(?x)
                <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
                <ul[^>]*>\s*
                <li>(?P<title>.+?)
                by (?P<creator>.+?)
                (?:
                    \(.+?\)|
                    <a[^>]*
                        (?:
                            \bhref=["\']/red[^>]*>|             # drop possible
                            >\s*Listen ad-free with YouTube Red # YouTube Red ad 
                        )
                    .*?
                )?</li
            ''',
            video_webpage)
        if m_music:
            video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))