diff --git a/test/helper.py b/test/helper.py index 8739f816c..984f2554a 100644 --- a/test/helper.py +++ b/test/helper.py @@ -111,8 +111,9 @@ def expect_info_dict(self, expected_dict, got_dict): u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) # Check for the presence of mandatory fields - for key in ('id', 'url', 'title', 'ext'): + for key in ('id', 'title', 'ext'): self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key) + self.assertTrue(any(key in info_dict.keys() and info_dict[key] for key in ('url', 'parts'))) # Check for mandatory fields that are automatically set by YoutubeDL for key in ['webpage_url', 'extractor', 'extractor_key']: self.assertTrue(got_dict.get(key), u'Missing field: %s' % key) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index ae0ec49f8..8a2af7808 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -47,6 +47,7 @@ from .utils import ( SameFileError, sanitize_filename, subtitles_filename, + build_part_filename, takewhile_inclusive, UnavailableVideoError, url_basename, @@ -990,8 +991,26 @@ class YoutubeDL(object): info_dict['__postprocessors'] = postprocessors info_dict['__files_to_merge'] = downloaded else: - # Just a single file - success = dl(filename, info_dict) + parts = info_dict.get('parts', []) + if not parts: + success = dl(filename, info_dict) + elif len(parts) == 1: + info_dict.update(parts[0]) + success = dl(filename, info_dict) + else: + # We check if the final video has already been downloaded + if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)): + self.fd.report_file_already_downloaded(filename) + success = True + else: + parts_success = [] + self.to_screen(u'[info] Downloading %s parts' % len(parts)) + for (i, part) in enumerate(parts): + part_info = dict(info_dict) + part_info.update(part) + part_filename = build_part_filename(filename, i) + parts_success.append(dl(part_filename, part_info)) + success = all(parts_success) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self.report_error('unable to download video data: %s' % str(err)) return diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 6af4b8aee..6bbe9d3df 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -95,6 +95,7 @@ from .postprocessor import ( FFmpegExtractAudioPP, FFmpegEmbedSubtitlePP, XAttrMetadataPP, + FFmpegJoinVideosPP, ) @@ -491,6 +492,8 @@ def parseOpts(overrideArguments=None): help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)') postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None, help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm)') + postproc.add_option('--join-parts', action='store_true', dest='joinparts', default=False, + help='Join the video parts if the video is splitted in different parts.') postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False, help='keeps the video file on disk after the post-processing; the video is erased by default') postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, @@ -792,6 +795,8 @@ def _real_main(argv=None): ydl.add_default_info_extractors() # PostProcessors + if opts.joinparts: + ydl.add_post_processor(FFmpegJoinVideosPP()) # Add the metadata pp first, the other pps will copy it if opts.addmetadata: ydl.add_post_processor(FFmpegMetadataPP()) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 78f238f84..facfbf2f5 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -109,6 +109,9 @@ class InfoExtractor(object): like_count: Number of positive ratings of the video dislike_count: Number of negative ratings of the video comment_count: Number of comments on the video + parts: A list of info_dicts for each of the parts of the video, + it must include the url field, if it's a rtmp download it + can contain additional fields for rtmpdump. age_limit: Age restriction for the video, as an integer (years) webpage_url: The url to the video webpage, if given to youtube-dl it should allow to get the same result again. (It will be set diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index 7a3891b89..618f586dd 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -10,7 +10,7 @@ class TudouIE(InfoExtractor): _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs|albumplay)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?' _TESTS = [{ u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', - u'file': u'159448201.f4v', + u'file': u'2xN2duXMxmw.f4v', u'md5': u'140a49ed444bd22f93330985d8475fcb', u'info_dict': { u"title": u"卡马乔国足开大脚长传冲吊集锦" @@ -58,21 +58,20 @@ class TudouIE(InfoExtractor): # It looks like the keys are the arguments that have to be passed as # the hd field in the request url, we pick the higher quality = sorted(segments.keys())[-1] - parts = segments[quality] - result = [] - len_parts = len(parts) - if len_parts > 1: - self.to_screen(u'%s: found %s parts' % (video_id, len_parts)) - for part in parts: + segs = segments[quality] + parts = [] + len_segs = len(segs) + if len_segs > 1: + self.to_screen(u'%s: found %s parts' % (video_id, len_segs)) + for part in segs: part_id = part['k'] final_url = self._url_for_id(part_id, quality) ext = (final_url.split('?')[0]).split('.')[-1] - part_info = {'id': part_id, - 'url': final_url, - 'ext': ext, - 'title': title, - 'thumbnail': thumbnail_url, - } - result.append(part_info) + parts.append({'url': final_url}) - return result + return {'id': video_id, + 'ext': ext, + 'title': title, + 'thumbnail': thumbnail_url, + 'parts': parts, + } diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index c22f2cdc6..f7dcbe0cd 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -2,6 +2,7 @@ import os import subprocess import sys import time +import io from .common import AudioConversionError, PostProcessor @@ -14,6 +15,7 @@ from ..utils import ( prepend_extension, shell_quote, subtitles_filename, + build_part_filename, ) @@ -40,15 +42,15 @@ class FFmpegPostProcessor(PostProcessor): def _uses_avconv(self): return self._get_executable() == self._exes['avconv'] - def run_ffmpeg_multiple_files(self, input_paths, out_path, opts): + def run_ffmpeg_multiple_files(self, input_paths, out_path, opts, input_opts=[]): if not self._get_executable(): raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.') files_cmd = [] for path in input_paths: files_cmd.extend(['-i', encodeFilename(path, True)]) - cmd = ([self._get_executable(), '-y'] + files_cmd - + opts + + cmd = ([self._get_executable(), '-y'] + + input_opts + files_cmd + opts + [encodeFilename(self._ffmpeg_filename_argument(out_path), True)]) if self._downloader.params.get('verbose', False): @@ -60,8 +62,8 @@ class FFmpegPostProcessor(PostProcessor): msg = stderr.strip().split('\n')[-1] raise FFmpegPostProcessorError(msg) - def run_ffmpeg(self, path, out_path, opts): - self.run_ffmpeg_multiple_files([path], out_path, opts) + def run_ffmpeg(self, path, out_path, opts, input_opts=[]): + self.run_ffmpeg_multiple_files([path], out_path, opts, input_opts) def _ffmpeg_filename_argument(self, fn): # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details @@ -483,3 +485,26 @@ class FFmpegMergerPP(FFmpegPostProcessor): self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args) return True, info + +class FFmpegJoinVideosPP(FFmpegPostProcessor): + def run(self, information): + filename = information['filepath'] + parts = information.get('parts') + if parts is None or len(parts) == 1: + return (True, information) + parts_files = [build_part_filename(filename, i) for (i, _) in enumerate(parts)] + files_file = u'%s.videos' % filename + with io.open(encodeFilename(files_file), 'w', encoding='utf-8') as f: + for video in parts_files: + f.write(u'file \'%s\'\n' % video) + self._downloader.to_screen(u'[ffmpeg] Joining video parts, destination: %s' % filename) + try: + self.run_ffmpeg(files_file, filename, ['-c', 'copy'], ['-f', 'concat']) + except FFmpegPostProcessorError: + return False, information + os.remove(encodeFilename(files_file)) + # We have to manually remove the parts if requested + if not self._downloader.params.get('keepvideo', False): + for part_file in parts_files: + os.remove(encodeFilename(part_file)) + return (True, information) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 29c9b1a4c..d5dcc3a66 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -838,6 +838,10 @@ def determine_ext(url, default_ext=u'unknown_video'): def subtitles_filename(filename, sub_lang, sub_format): return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format +def build_part_filename(final_filename, part_index): + (name, ext) = os.path.splitext(final_filename) + return '%s.%d%s' % (name, part_index, ext) + def date_from_str(date_str): """ Return a datetime object from a string in the format YYYYMMDD or