From 211bcaae3420458bdf4065ff776d96466a595668 Mon Sep 17 00:00:00 2001 From: fnord Date: Tue, 4 Aug 2015 13:39:52 -0500 Subject: [PATCH] fix format selection of formats-with-dashes-in-name Regression via 0130afb7: tokenize tokenizes a common character in format names ('-') which is not used as an operator in format selection This mangles '-' into 'UNTOKENIZEDDASH' prior to tokenizing, then demangles. Example breakage: youtube-dl http://video.pbs.org/video/2365496883 -f mp4-baseline-16x9 [PBS] 2365496883: Downloading JSON metadata [PBS] 2365496883: Downloading recommended_encoding video url info [PBS] 2365496883: Downloading webpage [PBS] 2365496883: Downloading alternate_encoding video url info Traceback (most recent call last): File "/usr/local/bin/youtube-dl", line 9, in execfile(__file__) File "/md23/youtube-dl/bin/youtube-dl", line 6, in youtube_dl.main() File "/md23/youtube-dl/youtube_dl/__init__.py", line 415, in main _real_main(argv) File "/md23/youtube-dl/youtube_dl/__init__.py", line 405, in _real_main retcode = ydl.download(all_urls) File "/md23/youtube-dl/youtube_dl/YoutubeDL.py", line 1645, in download url, force_generic_extractor=self.params.get('force_generic_extractor', False), extra_info={'arg_url': url, 'failed': None}) File "/md23/youtube-dl/youtube_dl/YoutubeDL.py", line 673, in extract_info return self.process_ie_result(ie_result, download, extra_info) File "/md23/youtube-dl/youtube_dl/YoutubeDL.py", line 724, in process_ie_result return self.process_video_result(ie_result, download=download) File "/md23/youtube-dl/youtube_dl/YoutubeDL.py", line 1298, in process_video_result format_selector = self.build_format_selector(req_format) File "/md23/youtube-dl/youtube_dl/YoutubeDL.py", line 1153, in build_format_selector parsed_selector = _parse_format_selection(iter(TokenIterator(tokens))) File "/md23/youtube-dl/youtube_dl/YoutubeDL.py", line 1000, in _parse_format_selection raise syntax_error('Operator not recognized: "{0}"'.format(string), start) SyntaxError: Invalid format specification: Operator not recognized: "-" mp4-baseline-16x9 ^ --- youtube_dl/YoutubeDL.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c608ff91a..87f11002d 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -941,7 +941,7 @@ class YoutubeDL(object): if type == getattr(tokenize, 'ENCODING', None): continue elif type in [tokenize.NAME, tokenize.NUMBER]: - current_selector = FormatSelector(SINGLE, string, []) + current_selector = FormatSelector(SINGLE, string.replace('UNTOKENIZEDDASH','-'), []) elif type == tokenize.OP: if string == ')': if not inside_group: @@ -1109,7 +1109,7 @@ class YoutubeDL(object): return selector_function(formats) return final_selector - stream = io.BytesIO(format_spec.encode('utf-8')) + stream = io.BytesIO(format_spec.replace('-','UNTOKENIZEDDASH').encode('utf-8')) try: tokens = list(compat_tokenize_tokenize(stream.readline)) except tokenize.TokenError: