From a0b8bd12e7122b4eb3323c7d2b6cb3682f1ca0ab Mon Sep 17 00:00:00 2001 From: e00E Date: Wed, 7 Oct 2015 19:24:54 +0200 Subject: [PATCH 1/4] Make the native hls downloader use disk space more efficiently It now saves the url of the most recently completed segment to figure out from where it has to resume. Before, it kept ALL segments which resulted in using double the disk space neccessary. --- youtube_dl/downloader/hls.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index a62d2047b..e9b3b0642 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -72,8 +72,16 @@ class NativeHlsFD(FragmentFD): self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) manifest = self.ydl.urlopen(man_url).read() + last_downloaded_segment_filename = filename + ".last_downloaded_segment" + last_downloaded_segment = None + if os.path.isfile(last_downloaded_segment_filename): + segment_file = open(last_downloaded_segment_filename, 'r') + last_downloaded_segment = segment_file.readline().strip() + segment_file.close() + s = manifest.decode('utf-8', 'ignore') fragment_urls = [] + arrived_at_last_downloaded_segment = (last_downloaded_segment is None) for line in s.splitlines(): line = line.strip() if line and not line.startswith('#'): @@ -81,7 +89,10 @@ class NativeHlsFD(FragmentFD): line if re.match(r'^https?://', line) else compat_urlparse.urljoin(man_url, line)) - fragment_urls.append(segment_url) + if arrived_at_last_downloaded_segment: + fragment_urls.append(segment_url) + elif segment_url == last_downloaded_segment: + arrived_at_last_downloaded_segment = True # We only download the first fragment during the test if self.params.get('test', False): break @@ -93,7 +104,6 @@ class NativeHlsFD(FragmentFD): self._prepare_and_start_frag_download(ctx) - frags_filenames = [] for i, frag_url in enumerate(fragment_urls): frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i) success = ctx['dl'].download(frag_filename, {'url': frag_url}) @@ -102,11 +112,15 @@ class NativeHlsFD(FragmentFD): down, frag_sanitized = sanitize_open(frag_filename, 'rb') ctx['dest_stream'].write(down.read()) down.close() - frags_filenames.append(frag_sanitized) + os.remove(encodeFilename(frag_sanitized)) + segments_file = open(last_downloaded_segment_filename, 'w') + segments_file.write(frag_url + "\n") + segments_file.close() + self._finish_frag_download(ctx) - for frag_file in frags_filenames: - os.remove(encodeFilename(frag_file)) + if last_downloaded_segment is not None: + os.remove(last_downloaded_segment_filename) return True From 831c7c446933aea6325b893870e5417d7c4cfc26 Mon Sep 17 00:00:00 2001 From: e00E Date: Fri, 9 Oct 2015 13:47:07 +0200 Subject: [PATCH 2/4] Fixed problems reported by dstftw: Used encodeFilename where appropriate. We are now saving the number of the segment instead of its url in case the url changes over time. Fixed the progress report by editing fragment.py to be aware or continuing. --- youtube_dl/downloader/fragment.py | 21 +++++++++++++-------- youtube_dl/downloader/hls.py | 30 ++++++++++++++++++++---------- 2 files changed, 33 insertions(+), 18 deletions(-) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 5a64b29ee..e5be00b87 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -21,11 +21,11 @@ class FragmentFD(FileDownloader): A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests). """ - def _prepare_and_start_frag_download(self, ctx): - self._prepare_frag_download(ctx) - self._start_frag_download(ctx) + def _prepare_and_start_frag_download(self, ctx, continue_dl=False, continue_fragment=None): + self._prepare_frag_download(ctx, continue_dl) + self._start_frag_download(ctx, continue_fragment) - def _prepare_frag_download(self, ctx): + def _prepare_frag_download(self, ctx, continue_dl=False): self.to_screen('[%s] Total fragments: %d' % (self.FD_NAME, ctx['total_frags'])) self.report_destination(ctx['filename']) dl = HttpQuietDownloader( @@ -40,21 +40,26 @@ class FragmentFD(FileDownloader): } ) tmpfilename = self.temp_name(ctx['filename']) - dest_stream, tmpfilename = sanitize_open(tmpfilename, 'wb') + dest_stream, tmpfilename = sanitize_open(tmpfilename, 'ab' if continue_dl else 'wb') ctx.update({ 'dl': dl, 'dest_stream': dest_stream, 'tmpfilename': tmpfilename, }) - def _start_frag_download(self, ctx): + def _start_frag_download(self, ctx, continue_fragment=None): + # continue_fragment is the last fragment that was already downloaded + # when continuing an old download or None when not continuing + total_frags = ctx['total_frags'] + downloaded_bytes = 0 if continue_fragment is None else os.path.getsize(ctx['tmpfilename']) + frag_index = 0 if continue_fragment is None else continue_fragment + 1 # This dict stores the download progress, it's updated by the progress # hook state = { 'status': 'downloading', - 'downloaded_bytes': 0, - 'frag_index': 0, + 'downloaded_bytes': downloaded_bytes, + 'frag_index': frag_index, 'frag_count': total_frags, 'filename': ctx['filename'], 'tmpfilename': ctx['tmpfilename'], diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index e9b3b0642..6f68d4685 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -72,16 +72,20 @@ class NativeHlsFD(FragmentFD): self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) manifest = self.ydl.urlopen(man_url).read() - last_downloaded_segment_filename = filename + ".last_downloaded_segment" + last_downloaded_segment_filename = encodeFilename(filename + ".last_downloaded_segment") last_downloaded_segment = None if os.path.isfile(last_downloaded_segment_filename): segment_file = open(last_downloaded_segment_filename, 'r') - last_downloaded_segment = segment_file.readline().strip() + try: + last_downloaded_segment = int(segment_file.readline().strip()) + except ValueError: + pass segment_file.close() s = manifest.decode('utf-8', 'ignore') fragment_urls = [] arrived_at_last_downloaded_segment = (last_downloaded_segment is None) + current_fragment = 0 for line in s.splitlines(): line = line.strip() if line and not line.startswith('#'): @@ -91,21 +95,28 @@ class NativeHlsFD(FragmentFD): else compat_urlparse.urljoin(man_url, line)) if arrived_at_last_downloaded_segment: fragment_urls.append(segment_url) - elif segment_url == last_downloaded_segment: - arrived_at_last_downloaded_segment = True + else: + if current_fragment == last_downloaded_segment: + arrived_at_last_downloaded_segment = True # We only download the first fragment during the test if self.params.get('test', False): break + current_fragment += 1 + + skipped_fragments = ( + last_downloaded_segment + 1 + if last_downloaded_segment is not None + else 0) ctx = { 'filename': filename, - 'total_frags': len(fragment_urls), + 'total_frags': skipped_fragments + len(fragment_urls), } - self._prepare_and_start_frag_download(ctx) + self._prepare_and_start_frag_download(ctx, continue_dl=True, continue_fragment=last_downloaded_segment) for i, frag_url in enumerate(fragment_urls): - frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i) + frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], skipped_fragments + i) success = ctx['dl'].download(frag_filename, {'url': frag_url}) if not success: return False @@ -114,13 +125,12 @@ class NativeHlsFD(FragmentFD): down.close() os.remove(encodeFilename(frag_sanitized)) segments_file = open(last_downloaded_segment_filename, 'w') - segments_file.write(frag_url + "\n") + segments_file.write(str(skipped_fragments + i) + '\n') segments_file.close() self._finish_frag_download(ctx) - if last_downloaded_segment is not None: - os.remove(last_downloaded_segment_filename) + os.remove(last_downloaded_segment_filename) return True From 13b0e7617d48d243e9854b43b6035b421e2982f4 Mon Sep 17 00:00:00 2001 From: e00E Date: Sat, 10 Oct 2015 23:43:03 +0200 Subject: [PATCH 3/4] Use ctx instead of function parameters --- youtube_dl/downloader/fragment.py | 19 ++++++++----------- youtube_dl/downloader/hls.py | 4 +++- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index e5be00b87..8ac150d4f 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -21,11 +21,11 @@ class FragmentFD(FileDownloader): A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests). """ - def _prepare_and_start_frag_download(self, ctx, continue_dl=False, continue_fragment=None): - self._prepare_frag_download(ctx, continue_dl) - self._start_frag_download(ctx, continue_fragment) + def _prepare_and_start_frag_download(self, ctx): + self._prepare_frag_download(ctx) + self._start_frag_download(ctx) - def _prepare_frag_download(self, ctx, continue_dl=False): + def _prepare_frag_download(self, ctx): self.to_screen('[%s] Total fragments: %d' % (self.FD_NAME, ctx['total_frags'])) self.report_destination(ctx['filename']) dl = HttpQuietDownloader( @@ -40,20 +40,17 @@ class FragmentFD(FileDownloader): } ) tmpfilename = self.temp_name(ctx['filename']) - dest_stream, tmpfilename = sanitize_open(tmpfilename, 'ab' if continue_dl else 'wb') + dest_stream, tmpfilename = sanitize_open(tmpfilename, 'ab' if ctx.get('continue_dl') else 'wb') ctx.update({ 'dl': dl, 'dest_stream': dest_stream, 'tmpfilename': tmpfilename, }) - def _start_frag_download(self, ctx, continue_fragment=None): - # continue_fragment is the last fragment that was already downloaded - # when continuing an old download or None when not continuing - + def _start_frag_download(self, ctx): total_frags = ctx['total_frags'] - downloaded_bytes = 0 if continue_fragment is None else os.path.getsize(ctx['tmpfilename']) - frag_index = 0 if continue_fragment is None else continue_fragment + 1 + downloaded_bytes = os.path.getsize(ctx['tmpfilename']) if ctx.get('continue_dl') else 0 + frag_index = ctx['continue_fragment'] + 1 if ctx.get('continue_fragment') else 0 # This dict stores the download progress, it's updated by the progress # hook state = { diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 6f68d4685..0841ca43a 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -111,9 +111,11 @@ class NativeHlsFD(FragmentFD): ctx = { 'filename': filename, 'total_frags': skipped_fragments + len(fragment_urls), + 'continue_dl': True, + 'continue_fragment': last_downloaded_segment } - self._prepare_and_start_frag_download(ctx, continue_dl=True, continue_fragment=last_downloaded_segment) + self._prepare_and_start_frag_download(ctx) for i, frag_url in enumerate(fragment_urls): frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], skipped_fragments + i) From 54d31e514d4d9b77f6effaea0f02b3e98e7d7f48 Mon Sep 17 00:00:00 2001 From: e00E Date: Sun, 1 Nov 2015 15:35:29 +0100 Subject: [PATCH 4/4] Fix stdout with new native hls --- youtube_dl/downloader/fragment.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 8ac150d4f..34c27b349 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -49,7 +49,10 @@ class FragmentFD(FileDownloader): def _start_frag_download(self, ctx): total_frags = ctx['total_frags'] - downloaded_bytes = os.path.getsize(ctx['tmpfilename']) if ctx.get('continue_dl') else 0 + try: + downloaded_bytes = os.path.getsize(ctx['tmpfilename']) if ctx.get('continue_dl') else 0 + except os.error as e: + downloaded_bytes = 0 frag_index = ctx['continue_fragment'] + 1 if ctx.get('continue_fragment') else 0 # This dict stores the download progress, it's updated by the progress # hook