From bae4ce394c3b4cc61fd7012f513168b62a2b7831 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 20 Jul 2015 22:59:17 +0200 Subject: [PATCH 1/4] Use a custom downloader for merging formats --- youtube_dl/YoutubeDL.py | 12 +--------- youtube_dl/downloader/__init__.py | 4 ++++ youtube_dl/downloader/common.py | 3 ++- youtube_dl/downloader/merge.py | 37 +++++++++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 12 deletions(-) create mode 100644 youtube_dl/downloader/merge.py diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 00af78e06..4fa708cb5 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -71,7 +71,6 @@ from .utils import ( write_json_file, write_string, YoutubeDLHandler, - prepend_extension, replace_extension, args_to_str, age_restricted, @@ -1377,7 +1376,6 @@ class YoutubeDL(object): return fd.download(name, info) if info_dict.get('requested_formats') is not None: - downloaded = [] success = True merger = FFmpegMergerPP(self) if not merger.available: @@ -1420,16 +1418,8 @@ class YoutubeDL(object): '[download] %s has already been downloaded and ' 'merged' % filename) else: - for f in requested_formats: - new_info = dict(info_dict) - new_info.update(f) - fname = self.prepare_filename(new_info) - fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext']) - downloaded.append(fname) - partial_success = dl(fname, new_info) - success = success and partial_success + success = dl(filename, info_dict) info_dict['__postprocessors'] = postprocessors - info_dict['__files_to_merge'] = downloaded else: # Just a single file success = dl(filename, info_dict) diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index dccc59212..7b0aca8ad 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -9,6 +9,7 @@ from .http import HttpFD from .rtsp import RtspFD from .rtmp import RtmpFD from .dash import DashSegmentsFD +from .merge import MergeFD from ..utils import ( determine_protocol, @@ -27,6 +28,9 @@ PROTOCOL_MAP = { def get_suitable_downloader(info_dict, params={}): """Get the downloader class that can handle the info dict.""" + if info_dict.get('requested_formats') is not None: + return MergeFD + protocol = determine_protocol(info_dict) info_dict['protocol'] = protocol diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 97e755d4b..ad971772b 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -137,7 +137,8 @@ class FileDownloader(object): return int(round(number * multiplier)) def to_screen(self, *args, **kargs): - self.ydl.to_screen(*args, **kargs) + if not self.params.get('quiet'): + self.ydl.to_screen(*args, **kargs) def to_stderr(self, message): self.ydl.to_screen(message) diff --git a/youtube_dl/downloader/merge.py b/youtube_dl/downloader/merge.py new file mode 100644 index 000000000..7eadeb9b1 --- /dev/null +++ b/youtube_dl/downloader/merge.py @@ -0,0 +1,37 @@ +from __future__ import unicode_literals + +from .common import FileDownloader +import youtube_dl +from ..utils import prepend_extension + + +class MergeFD(FileDownloader): + def real_download(self, filename, info_dict): + infos = [] + for f in info_dict['requested_formats']: + new_info = dict(info_dict) + del new_info['requested_formats'] + new_info.update(f) + fname = self.ydl.prepare_filename(new_info) + fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext']) + infos.append((fname, new_info)) + success = True + for fname, info in infos: + params = dict(self.params) + params.update({ + 'quiet': True, + 'noprogress': True, + }) + fd = youtube_dl.downloader.get_suitable_downloader(info, self.params)(self.ydl, params) + + def hook(status): + self._hook_progress(status) + + fd.add_progress_hook(hook) + self.report_destination(fname) + partial_success = fd.download(fname, info) + success = success and partial_success + + info_dict['__files_to_merge'] = [fname for fname, _ in infos] + + return True From 675a966176f1e9a9a11e75b2e7a14213fbb8cd67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 21 Jul 2015 14:19:22 +0200 Subject: [PATCH 2/4] Add experimental support for downloading DASH files in parallel --- youtube_dl/__init__.py | 1 + youtube_dl/downloader/common.py | 7 ++ youtube_dl/downloader/merge.py | 113 ++++++++++++++++++++++++++++---- youtube_dl/options.py | 4 ++ 4 files changed, 113 insertions(+), 12 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 55b22c889..1842e9e22 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -372,6 +372,7 @@ def _real_main(argv=None): 'external_downloader_args': external_downloader_args, 'postprocessor_args': postprocessor_args, 'cn_verification_proxy': opts.cn_verification_proxy, + 'parallel_dash_downloads': opts.parallel_dash_downloads, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index ad971772b..716e0cfa7 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -14,6 +14,10 @@ from ..utils import ( ) +class StopDownload(Exception): + pass + + class FileDownloader(object): """File Downloader class. @@ -232,6 +236,9 @@ class FileDownloader(object): self.to_console_title('youtube-dl ' + msg) def report_progress(self, s): + if s.get('_skip_report_progress'): + return + if s['status'] == 'finished': if self.params.get('noprogress', False): self.to_screen('[download] Download completed') diff --git a/youtube_dl/downloader/merge.py b/youtube_dl/downloader/merge.py index 7eadeb9b1..5774dfd07 100644 --- a/youtube_dl/downloader/merge.py +++ b/youtube_dl/downloader/merge.py @@ -1,20 +1,19 @@ -from __future__ import unicode_literals +from __future__ import unicode_literals, division -from .common import FileDownloader +import threading + +from .common import FileDownloader, StopDownload import youtube_dl from ..utils import prepend_extension +def _join_threads(threads, timeout=None): + for t in threads: + t.join(timeout=timeout) + + class MergeFD(FileDownloader): - def real_download(self, filename, info_dict): - infos = [] - for f in info_dict['requested_formats']: - new_info = dict(info_dict) - del new_info['requested_formats'] - new_info.update(f) - fname = self.ydl.prepare_filename(new_info) - fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext']) - infos.append((fname, new_info)) + def _normal_download(self, filename, infos): success = True for fname, info in infos: params = dict(self.params) @@ -31,7 +30,97 @@ class MergeFD(FileDownloader): self.report_destination(fname) partial_success = fd.download(fname, info) success = success and partial_success + return success - info_dict['__files_to_merge'] = [fname for fname, _ in infos] + def _parallel_download(self, filename, infos): + self.report_warning('Downloading DASH formats in parallel is an experimental feature, some things may not work as expected') + threads = [] + statuses = [] + downloaders = [] + lock = threading.Lock() + stop_event = threading.Event() + for fname, info in infos: + params = dict(self.params) + params.update({ + 'quiet': True, + 'noprogress': True, + }) + FD = youtube_dl.downloader.get_suitable_downloader(info, self.params) + fd = FD(self.ydl, params) + downloaders.append(fd) + status = {} + statuses.append(status) + + def hook(s, status=status): + with lock: + status.update(s) + s['_skip_report_progress'] = True + self._hook_progress(s) + + global_status = {'filename': filename} + if any(s.get('status') == 'downloading' for s in statuses): + global_status['status'] = 'downloading' + elif all(s.get('status') == 'finished' for s in statuses): + global_status['status'] = 'finished' + else: + global_status['status'] = None + for s in statuses: + for key in ['total_bytes', 'downloaded_bytes', 'eta', 'elapsed', 'speed']: + if s.get(key) is not None: + global_status.setdefault(key, 0) + global_status[key] += s[key] + # Don't call _hook_progress because it's not a real file + self.report_progress(global_status) + if stop_event.is_set(): + raise StopDownload() + + fd.add_progress_hook(hook) + self.report_destination(fname) + + def dl(fd, *args): + fd._error = None + try: + return fd.download(*args) + except StopDownload: + pass + except Exception as err: + fd._error = err + + thread = threading.Thread(target=dl, args=(fd, fname, info)) + threads.append(thread) + try: + for t in threads: + t.start() + while True: + # the timeout seems to be required so that the main thread can + # catch the exceptions in python 2.x + _join_threads(threads, timeout=1) + if not any(t.is_alive() for t in threads): + break + except BaseException: + stop_event.set() + _join_threads(threads) + raise + + for fd in downloaders: + if fd._error is not None: + raise fd._error return True + + def real_download(self, filename, info_dict): + infos = [] + for f in info_dict['requested_formats']: + new_info = dict(info_dict) + del new_info['requested_formats'] + new_info.update(f) + fname = self.ydl.prepare_filename(new_info) + fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext']) + infos.append((fname, new_info)) + + info_dict['__files_to_merge'] = [name for name, _ in infos] + + if self.params.get('parallel_dash_downloads', False): + return self._parallel_download(filename, infos) + else: + return self._normal_download(filename, infos) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 85365d769..eb199dfb2 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -424,6 +424,10 @@ def parseOpts(overrideArguments=None): '--external-downloader-args', dest='external_downloader_args', metavar='ARGS', help='Give these arguments to the external downloader') + downloader.add_option( + '--parallel-dash-downloads', + action='store_true', dest='parallel_dash_downloads', default=False, + help='(Experimental) download dash formats in parallel') workarounds = optparse.OptionGroup(parser, 'Workarounds') workarounds.add_option( From f9775ae86e903b25cff223937113d209143f1a3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 21 Jul 2015 19:24:20 +0200 Subject: [PATCH 3/4] [steam] Add extractor for live broadcasts (#6012) --- youtube_dl/downloader/dash.py | 73 ++++++++++++++++++++++++++----- youtube_dl/extractor/__init__.py | 5 ++- youtube_dl/extractor/steam.py | 75 ++++++++++++++++++++++++++++++++ 3 files changed, 140 insertions(+), 13 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index a4685d307..a51e5fc3b 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -1,9 +1,19 @@ from __future__ import unicode_literals +import itertools import re +import time +import xml.etree.ElementTree as etree from .common import FileDownloader -from ..compat import compat_urllib_request +from ..compat import ( + compat_str, + compat_urllib_request, +) +from ..utils import ( + parse_iso8601, + xpath_with_ns, +) class DashSegmentsFD(FileDownloader): @@ -13,9 +23,6 @@ class DashSegmentsFD(FileDownloader): def real_download(self, filename, info_dict): self.report_destination(filename) tmpfilename = self.temp_name(filename) - base_url = info_dict['url'] - segment_urls = info_dict['segment_urls'] - is_test = self.params.get('test', False) remaining_bytes = self._TEST_FILE_SIZE if is_test else None byte_counter = 0 @@ -34,21 +41,63 @@ class DashSegmentsFD(FileDownloader): outf.write(data) return len(data) - def combine_url(base_url, target_url): - if re.match(r'^https?://', target_url): - return target_url - return '%s/%s' % (base_url, target_url) + if not info_dict.get('is_live'): + base_url = info_dict['url'] + segment_urls = info_dict['segment_urls'] + + def combine_url(base_url, target_url): + if re.match(r'^https?://', target_url): + return target_url + return '%s/%s' % (base_url, target_url) + + init_url = combine_url(base_url, info_dict['initialization_url']) + segment_urls = [combine_url(base_url, segment_url) for segment_url in segment_urls] + + else: + manifest_url = info_dict['url'] + manifest_xml = self.ydl.urlopen(manifest_url).read() + manifest = etree.fromstring(manifest_xml) + _x = lambda p: xpath_with_ns(p, {'ns': 'urn:mpeg:DASH:schema:MPD:2011'}) + ad = [e for e in manifest.findall(_x('ns:Period/ns:AdaptationSet')) if e.attrib['id'] == info_dict['mpd_set_id']][0] + segment_template = ad.find(_x('ns:SegmentTemplate')) + + def subs_url_template(url_template, repr_id, number=None): + result = url_template.replace('$RepresentationID$', repr_id) + if number is not None: + result = result.replace('$Number$', compat_str(number)) + return result + + start_time = parse_iso8601(manifest.attrib['availabilityStartTime']) + segment_duration = (int(segment_template.attrib['duration']) / int(segment_template.attrib['timescale'])) # in seconds + first_segment = int((int(time.time()) - start_time) / segment_duration) + init_url = subs_url_template(segment_template.attrib['initialization'], '1') + + def build_live_segment_urls(): + for nr in itertools.count(first_segment): + # We have to avoid requesting a segment before its start time + expected_time = start_time + nr * segment_duration + wait_time = expected_time - time.time() + if wait_time > 0: + time.sleep(wait_time) + yield subs_url_template(segment_template.attrib['media'], '1', nr) + segment_urls = build_live_segment_urls() with open(tmpfilename, 'wb') as outf: append_url_to_file( - outf, combine_url(base_url, info_dict['initialization_url']), + outf, init_url, 'initialization segment') for i, segment_url in enumerate(segment_urls): + note = 'segment %d' % (i + 1) + if not info_dict.get('is_live'): + note += ' / %d' % len(segment_urls) segment_len = append_url_to_file( - outf, combine_url(base_url, segment_url), - 'segment %d / %d' % (i + 1, len(segment_urls)), - remaining_bytes) + outf, segment_url, note, remaining_bytes) byte_counter += segment_len + self._hook_progress({ + 'status': 'downloading', + 'downloaded_bytes': byte_counter, + 'filename': filename, + }) if remaining_bytes is not None: remaining_bytes -= segment_len if remaining_bytes <= 0: diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 50da08830..7d6423611 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -561,7 +561,10 @@ from .srf import SrfIE from .srmediathek import SRMediathekIE from .ssa import SSAIE from .stanfordoc import StanfordOpenClassroomIE -from .steam import SteamIE +from .steam import ( + SteamIE, + SteamBroadcastsIE, +) from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE from .streetvoice import StreetVoiceIE diff --git a/youtube_dl/extractor/steam.py b/youtube_dl/extractor/steam.py index 183dcb03c..40b4eab73 100644 --- a/youtube_dl/extractor/steam.py +++ b/youtube_dl/extractor/steam.py @@ -5,7 +5,9 @@ import re from .common import InfoExtractor from ..utils import ( ExtractorError, + int_or_none, unescapeHTML, + xpath_with_ns, ) @@ -121,3 +123,76 @@ class SteamIE(InfoExtractor): raise ExtractorError('Could not find any videos') return self.playlist_result(videos, playlist_id, playlist_title) + + +class SteamBroadcastsIE(InfoExtractor): + IE_DESC = 'Steam and Dota 2 live broadcasts' + _VALID_URL = r'https?://(?:www\.)?(?:steamcommunity\.com/broadcast|dota2\.com)/watch/(?P\d+)' + + # Only livestreams, test urls can be obtained from + # https://steamcommunity.com/?subsection=broadcasts or + # https://www.dota2.com/watch/ + _TESTS = [ + { + 'url': 'http://www.dota2.com/watch/76561197986987526', + 'only_matching': True, + }, + { + 'url': 'https://steamcommunity.com/broadcast/watch/76561197986987526', + 'only_matching': True, + }, + ] + + def _extract_dash_manifest_formats(self, manifest_url, video_id): + manifest = self._download_xml(manifest_url, video_id) + + _x = lambda p: xpath_with_ns(p, {'ns': 'urn:mpeg:DASH:schema:MPD:2011'}) + formats = [] + for ad_set in manifest.findall(_x('ns:Period/ns:AdaptationSet')): + set_id = ad_set.attrib['id'] + if set_id == 'game': + continue + for repr in ad_set.findall(_x('ns:Representation')): + repr_id = repr.attrib['id'] + if set_id == 'audio': + ext = 'm4a' + vcodec = 'none' + acodec = repr.attrib.get('codecs') + preference = -10 + else: + ext = 'mp4' + vcodec = repr.attrib.get('codecs') + acodec = 'none' + preference = 0 + formats.append({ + 'url': manifest_url, + 'ext': ext, + 'format_id': '{0}-{1}'.format(set_id, repr_id), + 'protocol': 'http_dash_segments', + 'mpd_set_id': set_id, + 'mpd_representation_id': repr_id, + 'height': int_or_none(repr.attrib.get('height')), + 'width': int_or_none(repr.attrib.get('width')), + 'vcodec': vcodec, + 'acodec': acodec, + 'preference': preference, + }) + return formats + + def _real_extract(self, url): + steamid = self._match_id(url) + + broadcast_mpd_info = self._download_json('https://steamcommunity.com/broadcast/getbroadcastmpd/?steamid={0}&broadcastid=0'.format(steamid), steamid) + broadcast_id = broadcast_mpd_info['broadcastid'] + broadcast_info = self._download_json('https://steamcommunity.com/broadcast/getbroadcastinfo/?steamid={0}&broadcastid={1}'.format(steamid, broadcast_id), steamid) + + manifest_url = broadcast_mpd_info['url'] + formats = self._extract_dash_manifest_formats(manifest_url, steamid) + self._sort_formats(formats) + + return { + 'id': steamid, + 'title': broadcast_info['title'], + 'formats': formats, + 'is_live': True, + } From 9fa7bb61bcda565ea72e278b28af440254eb04b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 21 Jul 2015 20:20:25 +0200 Subject: [PATCH 4/4] [downloader/common] Don't keep '.part' files for livestreams when 'Ctrl-C' is pressed Rename them to the original name and return True, allows to record and merge DASH livestreams --- youtube_dl/downloader/common.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 716e0cfa7..413bab380 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -347,7 +347,16 @@ class FileDownloader(object): self.to_screen('[download] Sleeping %s seconds...' % sleep_interval) time.sleep(sleep_interval) - return self.real_download(filename, info_dict) + try: + return self.real_download(filename, info_dict) + except (KeyboardInterrupt, StopDownload): + if info_dict.get('is_live'): + self.to_screen('[download] Stopping recording of livestream: {0}'.format(filename)) + tmpfilename = self.temp_name(filename) + if os.path.exists(encodeFilename(tmpfilename)): + self.try_rename(tmpfilename, filename) + return True + raise def real_download(self, filename, info_dict): """Real download process. Redefine in subclasses."""