Merge branch 'multipart_videos' of https://github.com/jaimemf/youtube-dl into multipart_videos

Conflicts: test/test_download.py youtube_dl/YoutubeDL.py youtube_dl/__init__.py youtube_dl/extractor/common.py youtube_dl/postprocessor/ffmpeg.py
2014-03-25 19:45:31 -07:00 · 2014-03-25 19:45:31 -07:00 · 58e55dbd3d
commit 58e55dbd3d
parent ad8915b729 4a247aa0c4
7 changed files with 79 additions and 23 deletions
--- a/test/helper.py
+++ b/test/helper.py
@ -111,8 +111,9 @@ def expect_info_dict(self, expected_dict, got_dict):
                u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))

    # Check for the presence of mandatory fields
-    for key in ('id', 'url', 'title', 'ext'):
+    for key in ('id', 'title', 'ext'):
        self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
+    self.assertTrue(any(key in info_dict.keys() and info_dict[key] for key in ('url', 'parts')))
    # Check for mandatory fields that are automatically set by YoutubeDL
    for key in ['webpage_url', 'extractor', 'extractor_key']:
        self.assertTrue(got_dict.get(key), u'Missing field: %s' % key)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -47,6 +47,7 @@ from .utils import (
    SameFileError,
    sanitize_filename,
    subtitles_filename,
+    build_part_filename,
    takewhile_inclusive,
    UnavailableVideoError,
    url_basename,
@ -990,8 +991,26 @@ class YoutubeDL(object):
                        info_dict['__postprocessors'] = postprocessors
                        info_dict['__files_to_merge'] = downloaded
                    else:
-                        # Just a single file
-                        success = dl(filename, info_dict)
+                        parts = info_dict.get('parts', [])
+                        if not parts:
+                            success = dl(filename, info_dict)
+                        elif len(parts) == 1:
+                            info_dict.update(parts[0])
+                            success = dl(filename, info_dict)
+                        else:
+                            # We check if the final video has already been downloaded
+                            if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)):
+                                self.fd.report_file_already_downloaded(filename)
+                                success = True
+                            else:
+                                parts_success = []
+                                self.to_screen(u'[info] Downloading %s parts' % len(parts))
+                                for (i, part) in enumerate(parts):
+                                    part_info = dict(info_dict)
+                                    part_info.update(part)
+                                    part_filename = build_part_filename(filename, i)
+                                    parts_success.append(dl(part_filename, part_info))
+                                success = all(parts_success)
                except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                    self.report_error('unable to download video data: %s' % str(err))
                    return
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -95,6 +95,7 @@ from .postprocessor import (
    FFmpegExtractAudioPP,
    FFmpegEmbedSubtitlePP,
    XAttrMetadataPP,
+    FFmpegJoinVideosPP,
 )


@ -491,6 +492,8 @@ def parseOpts(overrideArguments=None):
            help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)')
    postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None,
            help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm)')
+    postproc.add_option('--join-parts', action='store_true', dest='joinparts', default=False,
+            help='Join the video parts if the video is splitted in different parts.')
    postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
            help='keeps the video file on disk after the post-processing; the video is erased by default')
    postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,
@ -792,6 +795,8 @@ def _real_main(argv=None):
        ydl.add_default_info_extractors()

        # PostProcessors
+        if opts.joinparts:
+            ydl.add_post_processor(FFmpegJoinVideosPP())
        # Add the metadata pp first, the other pps will copy it
        if opts.addmetadata:
            ydl.add_post_processor(FFmpegMetadataPP())
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -109,6 +109,9 @@ class InfoExtractor(object):
    like_count:     Number of positive ratings of the video
    dislike_count:  Number of negative ratings of the video
    comment_count:  Number of comments on the video
+    parts:          A list of info_dicts for each of the parts of the video,
+                    it must include the url field, if it's a rtmp download it
+                    can contain additional fields for rtmpdump.
    age_limit:      Age restriction for the video, as an integer (years)
    webpage_url:    The url to the video webpage, if given to youtube-dl it
                    should allow to get the same result again. (It will be set
--- a/youtube_dl/extractor/tudou.py
+++ b/youtube_dl/extractor/tudou.py
@ -10,7 +10,7 @@ class TudouIE(InfoExtractor):
    _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs|albumplay)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?'
    _TESTS = [{
        u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
-        u'file': u'159448201.f4v',
+        u'file': u'2xN2duXMxmw.f4v',
        u'md5': u'140a49ed444bd22f93330985d8475fcb',
        u'info_dict': {
            u"title": u"卡马乔国足开大脚长传冲吊集锦"
@ -58,21 +58,20 @@ class TudouIE(InfoExtractor):
        # It looks like the keys are the arguments that have to be passed as
        # the hd field in the request url, we pick the higher
        quality = sorted(segments.keys())[-1]
-        parts = segments[quality]
-        result = []
-        len_parts = len(parts)
-        if len_parts > 1:
-            self.to_screen(u'%s: found %s parts' % (video_id, len_parts))
-        for part in parts:
+        segs = segments[quality]
+        parts = []
+        len_segs = len(segs)
+        if len_segs > 1:
+            self.to_screen(u'%s: found %s parts' % (video_id, len_segs))
+        for part in segs:
            part_id = part['k']
            final_url = self._url_for_id(part_id, quality)
            ext = (final_url.split('?')[0]).split('.')[-1]
-            part_info = {'id': part_id,
-                          'url': final_url,
-                          'ext': ext,
-                          'title': title,
-                          'thumbnail': thumbnail_url,
-                          }
-            result.append(part_info)
+            parts.append({'url': final_url})

-        return result
+        return {'id': video_id,
+                'ext': ext,
+                'title': title,
+                'thumbnail': thumbnail_url,
+                'parts': parts,
+                }
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@ -2,6 +2,7 @@ import os
 import subprocess
 import sys
 import time
+import io


 from .common import AudioConversionError, PostProcessor
@ -14,6 +15,7 @@ from ..utils import (
    prepend_extension,
    shell_quote,
    subtitles_filename,
+    build_part_filename,
 )


@ -40,15 +42,15 @@ class FFmpegPostProcessor(PostProcessor):
    def _uses_avconv(self):
        return self._get_executable() == self._exes['avconv']

-    def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
+    def run_ffmpeg_multiple_files(self, input_paths, out_path, opts, input_opts=[]):
        if not self._get_executable():
            raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')

        files_cmd = []
        for path in input_paths:
            files_cmd.extend(['-i', encodeFilename(path, True)])
-        cmd = ([self._get_executable(), '-y'] + files_cmd
-               + opts +
+        cmd = ([self._get_executable(), '-y'] +
+               input_opts + files_cmd + opts +
               [encodeFilename(self._ffmpeg_filename_argument(out_path), True)])

        if self._downloader.params.get('verbose', False):
@ -60,8 +62,8 @@ class FFmpegPostProcessor(PostProcessor):
            msg = stderr.strip().split('\n')[-1]
            raise FFmpegPostProcessorError(msg)

-    def run_ffmpeg(self, path, out_path, opts):
-        self.run_ffmpeg_multiple_files([path], out_path, opts)
+    def run_ffmpeg(self, path, out_path, opts, input_opts=[]):
+        self.run_ffmpeg_multiple_files([path], out_path, opts, input_opts)

    def _ffmpeg_filename_argument(self, fn):
        # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
@ -483,3 +485,26 @@ class FFmpegMergerPP(FFmpegPostProcessor):
        self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
        return True, info

+
+class FFmpegJoinVideosPP(FFmpegPostProcessor):
+    def run(self, information):
+        filename = information['filepath']
+        parts = information.get('parts')
+        if parts is None or len(parts) == 1:
+            return (True, information)
+        parts_files = [build_part_filename(filename, i) for (i, _) in enumerate(parts)]
+        files_file = u'%s.videos' % filename
+        with io.open(encodeFilename(files_file), 'w', encoding='utf-8') as f:
+                    for video in parts_files:
+                        f.write(u'file \'%s\'\n' % video)
+        self._downloader.to_screen(u'[ffmpeg] Joining video parts, destination: %s' % filename)
+        try:
+            self.run_ffmpeg(files_file, filename, ['-c', 'copy'], ['-f', 'concat'])
+        except FFmpegPostProcessorError:
+            return False, information
+        os.remove(encodeFilename(files_file))
+        # We have to manually remove the parts if requested
+        if not self._downloader.params.get('keepvideo', False):
+            for part_file in parts_files:
+                os.remove(encodeFilename(part_file))
+        return (True, information)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -838,6 +838,10 @@ def determine_ext(url, default_ext=u'unknown_video'):
 def subtitles_filename(filename, sub_lang, sub_format):
    return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format

+def build_part_filename(final_filename, part_index):
+    (name, ext) = os.path.splitext(final_filename)
+    return '%s.%d%s' % (name, part_index, ext)
+
 def date_from_str(date_str):
    """
    Return a datetime object from a string in the format YYYYMMDD or