diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 75945e38f..70989e232 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -527,6 +527,8 @@ class TestYoutubeDL(unittest.TestCase): 'ext': 'mp4', 'width': None, 'height': 1080, + 'title1': '$PATH', + 'title2': '%PATH%', } def fname(templ): @@ -545,10 +547,14 @@ class TestYoutubeDL(unittest.TestCase): self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4') self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4') self.assertEqual(fname('%(height) 0 6d.%(ext)s'), ' 01080.mp4') + self.assertEqual(fname('%%'), '%') + self.assertEqual(fname('%%%%'), '%%') self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4') self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4') self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s') self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4') + self.assertEqual(fname('Hello %(title1)s'), 'Hello $PATH') + self.assertEqual(fname('Hello %(title2)s'), 'Hello %PATH%') def test_format_note(self): ydl = YoutubeDL() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 60ee4b7d8..8730d32ef 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -20,6 +20,7 @@ import re import shutil import subprocess import socket +import string import sys import time import tokenize @@ -674,7 +675,19 @@ class YoutubeDL(object): FORMAT_RE.format(numeric_field), r'%({0})s'.format(numeric_field), outtmpl) - filename = expand_path(outtmpl % template_dict) + # expand_path translates '%%' into '%' and '$$' into '$' + # correspondingly that is not what we want since we need to keep + # '%%' intact for template dict substitution step. Working around + # with boundary-alike separator hack. + sep = ''.join([random.choice(string.ascii_letters) for _ in range(32)]) + outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep)) + + # outtmpl should be expand_path'ed before template dict substitution + # because meta fields may contain env variables we don't want to + # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and + # title "Hello $PATH", we don't want `$PATH` to be expanded. + filename = expand_path(outtmpl).replace(sep, '') % template_dict + # Temporary fix for #4787 # 'Treat' all problem characters by passing filename through preferredencoding # to workaround encoding issues with subprocess on python2 @ Windows diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py index 5f6f9faef..9b001ecff 100644 --- a/youtube_dl/downloader/ism.py +++ b/youtube_dl/downloader/ism.py @@ -98,7 +98,7 @@ def write_piff_header(stream, params): if is_audio: smhd_payload = s88.pack(0) # balance - smhd_payload = u16.pack(0) # reserved + smhd_payload += u16.pack(0) # reserved media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header else: vmhd_payload = u16.pack(0) # graphics mode @@ -126,7 +126,6 @@ def write_piff_header(stream, params): if fourcc == 'AACL': sample_entry_box = box(b'mp4a', sample_entry_payload) else: - sample_entry_payload = sample_entry_payload sample_entry_payload += u16.pack(0) # pre defined sample_entry_payload += u16.pack(0) # reserved sample_entry_payload += u32.pack(0) * 3 # pre defined diff --git a/youtube_dl/extractor/audioboom.py b/youtube_dl/extractor/audioboom.py index e48bb8972..393f381c6 100644 --- a/youtube_dl/extractor/audioboom.py +++ b/youtube_dl/extractor/audioboom.py @@ -43,7 +43,7 @@ class AudioBoomIE(InfoExtractor): def from_clip(field): if clip: - clip.get(field) + return clip.get(field) audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property( 'audio', webpage, 'audio url') diff --git a/youtube_dl/extractor/fivetv.py b/youtube_dl/extractor/fivetv.py index 15736c9fe..9f9863746 100644 --- a/youtube_dl/extractor/fivetv.py +++ b/youtube_dl/extractor/fivetv.py @@ -43,7 +43,7 @@ class FiveTVIE(InfoExtractor): 'info_dict': { 'id': 'glavnoe', 'ext': 'mp4', - 'title': 'Итоги недели с 8 по 14 июня 2015 года', + 'title': r're:^Итоги недели с \d+ по \d+ \w+ \d{4} года$', 'thumbnail': r're:^https?://.*\.jpg$', }, }, { @@ -70,7 +70,8 @@ class FiveTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_url = self._search_regex( - r']+?href="([^"]+)"[^>]+?class="videoplayer"', + [r']+?class="flowplayer[^>]+?data-href="([^"]+)"', + r']+?href="([^"]+)"[^>]+?class="videoplayer"'], webpage, 'video url') title = self._og_search_title(webpage, default=None) or self._search_regex( diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 5e8890d41..8c2ff39d5 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2740,7 +2740,7 @@ class GenericIE(InfoExtractor): rutube_urls = RutubeIE._extract_urls(webpage) if rutube_urls: return self.playlist_from_matches( - rutube_urls, ie=RutubeIE.ie_key()) + rutube_urls, video_id, video_title, ie=RutubeIE.ie_key()) # Look for WashingtonPost embeds wapo_urls = WashingtonPostIE._extract_urls(webpage) diff --git a/youtube_dl/extractor/karrierevideos.py b/youtube_dl/extractor/karrierevideos.py index 4e9eb67bf..f236a2f78 100644 --- a/youtube_dl/extractor/karrierevideos.py +++ b/youtube_dl/extractor/karrierevideos.py @@ -48,7 +48,7 @@ class KarriereVideosIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = (self._html_search_meta('title', webpage, default=None) or - self._search_regex(r'

([^<]+)

')) + self._search_regex(r'

([^<]+)

', webpage, 'video title')) video_id = self._search_regex( r'/config/video/(.+?)\.xml', webpage, 'video id') diff --git a/youtube_dl/extractor/slideshare.py b/youtube_dl/extractor/slideshare.py index 74a1dc672..e89ebebe7 100644 --- a/youtube_dl/extractor/slideshare.py +++ b/youtube_dl/extractor/slideshare.py @@ -31,7 +31,7 @@ class SlideshareIE(InfoExtractor): page_title = mobj.group('title') webpage = self._download_webpage(url, page_title) slideshare_obj = self._search_regex( - r'\$\.extend\(slideshare_object,\s*(\{.*?\})\);', + r'\$\.extend\(.*?slideshare_object,\s*(\{.*?\})\);', webpage, 'slideshare object') info = json.loads(slideshare_obj) if info['slideshow']['type'] != 'video': diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 77cd271ef..4597ccb3a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -673,6 +673,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, }, # video_info is None (https://github.com/rg3/youtube-dl/issues/4421) + # YouTube Red ad is not captured for creator { 'url': '__2ABJjxzNo', 'info_dict': { @@ -1649,7 +1650,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_webpage, 'license', default=None) m_music = re.search( - r']+class="title"[^>]*>\s*Music\s*\s*]*>\s*
  • (?P.+?) by (?P<creator>.+?)(?:\(.+?\))?</li', + r'''(?x) + <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s* + <ul[^>]*>\s* + <li>(?P<title>.+?) + by (?P<creator>.+?) + (?: + \(.+?\)| + <a[^>]* + (?: + \bhref=["\']/red[^>]*>| # drop possible + >\s*Listen ad-free with YouTube Red # YouTube Red ad + ) + .*? + )?</li + ''', video_webpage) if m_music: video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))