Refactor fragments interface and dash segments downloader
- Eliminate segment_urls and initialization_url + Introduce manifest_url (manifest may contain unfragmented data in this case url will be used for direct media URL and manifest_url for manifest itself correspondingly) * Rewrite dashsegments downloader to use fragments data * Improve generic mpd extraction
This commit is contained in:
		
							parent
							
								
									21d21b0c72
								
							
						
					
					
						commit
						86f4d14f81
					
				| @ -1,7 +1,6 @@ | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| import os | ||||
| import re | ||||
| 
 | ||||
| from .fragment import FragmentFD | ||||
| from ..compat import compat_urllib_error | ||||
| @ -19,34 +18,32 @@ class DashSegmentsFD(FragmentFD): | ||||
|     FD_NAME = 'dashsegments' | ||||
| 
 | ||||
|     def real_download(self, filename, info_dict): | ||||
|         base_url = info_dict['url'] | ||||
|         segment_urls = [info_dict['segment_urls'][0]] if self.params.get('test', False) else info_dict['segment_urls'] | ||||
|         initialization_url = info_dict.get('initialization_url') | ||||
|         segments = info_dict['fragments'][:1] if self.params.get( | ||||
|             'test', False) else info_dict['fragments'] | ||||
| 
 | ||||
|         ctx = { | ||||
|             'filename': filename, | ||||
|             'total_frags': len(segment_urls) + (1 if initialization_url else 0), | ||||
|             'total_frags': len(segments), | ||||
|         } | ||||
| 
 | ||||
|         self._prepare_and_start_frag_download(ctx) | ||||
| 
 | ||||
|         def combine_url(base_url, target_url): | ||||
|             if re.match(r'^https?://', target_url): | ||||
|                 return target_url | ||||
|             return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url) | ||||
| 
 | ||||
|         segments_filenames = [] | ||||
| 
 | ||||
|         fragment_retries = self.params.get('fragment_retries', 0) | ||||
|         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) | ||||
| 
 | ||||
|         def process_segment(segment, tmp_filename, fatal): | ||||
|             target_url, segment_name = segment | ||||
|         def process_segment(segment, tmp_filename, num): | ||||
|             segment_url = segment['url'] | ||||
|             segment_name = 'Frag%d' % num | ||||
|             target_filename = '%s-%s' % (tmp_filename, segment_name) | ||||
|             # In DASH, the first segment contains necessary headers to | ||||
|             # generate a valid MP4 file, so always abort for the first segment | ||||
|             fatal = num == 0 or not skip_unavailable_fragments | ||||
|             count = 0 | ||||
|             while count <= fragment_retries: | ||||
|                 try: | ||||
|                     success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)}) | ||||
|                     success = ctx['dl'].download(target_filename, {'url': segment_url}) | ||||
|                     if not success: | ||||
|                         return False | ||||
|                     down, target_sanitized = sanitize_open(target_filename, 'rb') | ||||
| @ -72,16 +69,8 @@ class DashSegmentsFD(FragmentFD): | ||||
|                 return False | ||||
|             return True | ||||
| 
 | ||||
|         segments_to_download = [(initialization_url, 'Init')] if initialization_url else [] | ||||
|         segments_to_download.extend([ | ||||
|             (segment_url, 'Seg%d' % i) | ||||
|             for i, segment_url in enumerate(segment_urls)]) | ||||
| 
 | ||||
|         for i, segment in enumerate(segments_to_download): | ||||
|             # In DASH, the first segment contains necessary headers to | ||||
|             # generate a valid MP4 file, so always abort for the first segment | ||||
|             fatal = i == 0 or not skip_unavailable_fragments | ||||
|             if not process_segment(segment, ctx['tmpfilename'], fatal): | ||||
|         for i, segment in enumerate(segments): | ||||
|             if not process_segment(segment, ctx['tmpfilename'], i): | ||||
|                 return False | ||||
| 
 | ||||
|         self._finish_frag_download(ctx) | ||||
|  | ||||
| @ -86,9 +86,10 @@ class InfoExtractor(object): | ||||
|                     from worst to best quality. | ||||
| 
 | ||||
|                     Potential fields: | ||||
|                     * url        Mandatory. The URL of the video file or URL of | ||||
|                                  the manifest file in case of fragmented media | ||||
|                                  (DASH, hls, hds). | ||||
|                     * url        Mandatory. The URL of the video file | ||||
|                     * manifest_url | ||||
|                                  The URL of the manifest file in case of | ||||
|                                  fragmented media (DASH, hls, hds) | ||||
|                     * ext        Will be calculated from URL if missing | ||||
|                     * format     A human-readable description of the format | ||||
|                                  ("mp4 container with h264/opus"). | ||||
| @ -1528,9 +1529,10 @@ class InfoExtractor(object): | ||||
|         mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group() | ||||
| 
 | ||||
|         return self._parse_mpd_formats( | ||||
|             compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict) | ||||
|             compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, | ||||
|             formats_dict=formats_dict, mpd_url=mpd_url) | ||||
| 
 | ||||
|     def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}): | ||||
|     def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None): | ||||
|         """ | ||||
|         Parse formats from MPD manifest. | ||||
|         References: | ||||
| @ -1654,6 +1656,7 @@ class InfoExtractor(object): | ||||
|                         f = { | ||||
|                             'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, | ||||
|                             'url': base_url, | ||||
|                             'manifest_url': mpd_url, | ||||
|                             'ext': mimetype2ext(mime_type), | ||||
|                             'width': int_or_none(representation_attrib.get('width')), | ||||
|                             'height': int_or_none(representation_attrib.get('height')), | ||||
| @ -1682,14 +1685,6 @@ class InfoExtractor(object): | ||||
|                                 if 'total_number' not in representation_ms_info and 'segment_duration': | ||||
|                                     segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale']) | ||||
|                                     representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) | ||||
|                                 representation_ms_info['segment_urls'] = [ | ||||
|                                     media_template % { | ||||
|                                         'Number': segment_number, | ||||
|                                         'Bandwidth': representation_attrib.get('bandwidth'), | ||||
|                                     } | ||||
|                                     for segment_number in range( | ||||
|                                         representation_ms_info['start_number'], | ||||
|                                         representation_ms_info['total_number'] + representation_ms_info['start_number'])] | ||||
|                                 representation_ms_info['fragments'] = [{ | ||||
|                                     'url': media_template % { | ||||
|                                         'Number': segment_number, | ||||
| @ -1703,7 +1698,6 @@ class InfoExtractor(object): | ||||
|                                 # $Number*$ or $Time$ in media template with S list available | ||||
|                                 # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg | ||||
|                                 # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411 | ||||
|                                 representation_ms_info['segment_urls'] = [] | ||||
|                                 representation_ms_info['fragments'] = [] | ||||
|                                 segment_time = 0 | ||||
|                                 segment_d = None | ||||
| @ -1715,7 +1709,6 @@ class InfoExtractor(object): | ||||
|                                         'Bandwidth': representation_attrib.get('bandwidth'), | ||||
|                                         'Number': segment_number, | ||||
|                                     } | ||||
|                                     representation_ms_info['segment_urls'].append(segment_url) | ||||
|                                     representation_ms_info['fragments'].append({ | ||||
|                                         'url': segment_url, | ||||
|                                         'duration': float_or_none(segment_d, representation_ms_info['timescale']), | ||||
| @ -1745,17 +1738,15 @@ class InfoExtractor(object): | ||||
|                                         'duration': float_or_none(s['d'], representation_ms_info['timescale']), | ||||
|                                     }) | ||||
|                             representation_ms_info['fragments'] = fragments | ||||
|                         if 'segment_urls' in representation_ms_info: | ||||
|                         # NB: MPD manifest may contain direct URLs to unfragmented media. | ||||
|                         # No fragments key is present in this case. | ||||
|                         if 'fragments' in representation_ms_info: | ||||
|                             f.update({ | ||||
|                                 'segment_urls': representation_ms_info['segment_urls'], | ||||
|                                 'fragments': [], | ||||
|                                 'protocol': 'http_dash_segments', | ||||
|                             }) | ||||
|                             if 'initialization_url' in representation_ms_info: | ||||
|                                 initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id) | ||||
|                                 f.update({ | ||||
|                                     'initialization_url': initialization_url, | ||||
|                                 }) | ||||
|                                 if not f.get('url'): | ||||
|                                     f['url'] = initialization_url | ||||
|                                 f['fragments'].append({'url': initialization_url}) | ||||
|  | ||||
| @ -1657,7 +1657,9 @@ class GenericIE(InfoExtractor): | ||||
|                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id) | ||||
|             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): | ||||
|                 info_dict['formats'] = self._parse_mpd_formats( | ||||
|                     doc, video_id, mpd_base_url=url.rpartition('/')[0]) | ||||
|                     doc, video_id, | ||||
|                     mpd_base_url=full_response.geturl().rpartition('/')[0], | ||||
|                     mpd_url=url) | ||||
|                 self._sort_formats(info_dict['formats']) | ||||
|                 return info_dict | ||||
|             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag): | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user