[tagesschau] Fix audio support
This commit is contained in:
		
							parent
							
								
									4c1b2e5c0e
								
							
						
					
					
						commit
						1a2b377cc2
					
				| @ -4,7 +4,10 @@ from __future__ import unicode_literals | |||||||
| import re | import re | ||||||
| 
 | 
 | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import parse_filesize | from ..utils import ( | ||||||
|  |     determine_ext, | ||||||
|  |     parse_filesize, | ||||||
|  | ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class TagesschauIE(InfoExtractor): | class TagesschauIE(InfoExtractor): | ||||||
| @ -82,37 +85,54 @@ class TagesschauIE(InfoExtractor): | |||||||
|         'xxl': {'quality': 5}, |         'xxl': {'quality': 5}, | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     def _extract_formats(self, download_text): |     def _extract_formats(self, download_text, media_kind): | ||||||
|         links = re.finditer( |         links = re.finditer( | ||||||
|             r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>', |             r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>', | ||||||
|             download_text) |             download_text) | ||||||
|         formats = [] |         formats = [] | ||||||
|         for l in links: |         for l in links: | ||||||
|  |             link_url = l.group('url') | ||||||
|  |             if not link_url: | ||||||
|  |                 continue | ||||||
|             format_id = self._search_regex( |             format_id = self._search_regex( | ||||||
|                 r'.*/[^/.]+\.([^/]+)\.[^/.]+', l.group('url'), 'format ID') |                 r'.*/[^/.]+\.([^/]+)\.[^/.]+$', link_url, 'format ID', | ||||||
|  |                 default=determine_ext(link_url)) | ||||||
|             format = { |             format = { | ||||||
|                 'format_id': format_id, |                 'format_id': format_id, | ||||||
|                 'url': l.group('url'), |                 'url': l.group('url'), | ||||||
|                 'format_name': l.group('name'), |                 'format_name': l.group('name'), | ||||||
|             } |             } | ||||||
|             m = re.match( |             title = l.group('title') | ||||||
|                 r'''(?x) |             if title: | ||||||
|                     Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10; |                 if media_kind.lower() == 'video': | ||||||
|                     (?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10; |                     m = re.match( | ||||||
|                     (?P<vbr>[0-9]+)kbps&\#10; |                         r'''(?x) | ||||||
|                     Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10; |                             Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10; | ||||||
|                     Größe:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''', |                             (?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10; | ||||||
|                 l.group('title')) |                             (?P<vbr>[0-9]+)kbps&\#10; | ||||||
|             if m: |                             Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10; | ||||||
|                 format.update({ |                             Größe:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''', | ||||||
|                     'format_note': m.group('audio_desc'), |                         title) | ||||||
|                     'vcodec': m.group('vcodec'), |                     if m: | ||||||
|                     'width': int(m.group('width')), |                         format.update({ | ||||||
|                     'height': int(m.group('height')), |                             'format_note': m.group('audio_desc'), | ||||||
|                     'abr': int(m.group('abr')), |                             'vcodec': m.group('vcodec'), | ||||||
|                     'vbr': int(m.group('vbr')), |                             'width': int(m.group('width')), | ||||||
|                     'filesize_approx': parse_filesize(m.group('filesize_approx')), |                             'height': int(m.group('height')), | ||||||
|                 }) |                             'abr': int(m.group('abr')), | ||||||
|  |                             'vbr': int(m.group('vbr')), | ||||||
|  |                             'filesize_approx': parse_filesize(m.group('filesize_approx')), | ||||||
|  |                         }) | ||||||
|  |                 else: | ||||||
|  |                     m = re.match( | ||||||
|  |                         r'(?P<format>.+?)-Format\s*:\s*(?P<abr>\d+)kbps\s*,\s*(?P<note>.+)', | ||||||
|  |                         title) | ||||||
|  |                     if m: | ||||||
|  |                         format.update({ | ||||||
|  |                             'format_note': '%s, %s' % (m.group('format'), m.group('note')), | ||||||
|  |                             'vcodec': 'none', | ||||||
|  |                             'abr': int(m.group('abr')), | ||||||
|  |                         }) | ||||||
|             formats.append(format) |             formats.append(format) | ||||||
|         self._sort_formats(formats) |         self._sort_formats(formats) | ||||||
|         return formats |         return formats | ||||||
| @ -154,23 +174,26 @@ class TagesschauIE(InfoExtractor): | |||||||
|             title = self._html_search_regex( |             title = self._html_search_regex( | ||||||
|                 r'<span class="headline".*?>(.*?)</span>', webpage, 'title') |                 r'<span class="headline".*?>(.*?)</span>', webpage, 'title') | ||||||
| 
 | 
 | ||||||
|             DOWNLOAD_REGEX = r'(?s)<p>Wir bieten dieses Video in folgenden Formaten zum Download an:</p>\s*<div class="controls">(.*?)</div>\s*<p>' |             DOWNLOAD_REGEX = r'(?s)<p>Wir bieten dieses (?P<kind>Video|Audio) in folgenden Formaten zum Download an:</p>\s*<div class="controls">(?P<links>.*?)</div>\s*<p>' | ||||||
| 
 | 
 | ||||||
|             webpage_type = self._og_search_property('type', webpage, default=None) |             webpage_type = self._og_search_property('type', webpage, default=None) | ||||||
|             if webpage_type == 'website':  # Article |             if webpage_type == 'website':  # Article | ||||||
|                 entries = [] |                 entries = [] | ||||||
|                 for num, (entry_title, download_text) in enumerate(re.findall( |                 for num, (entry_title, media_kind, download_text) in enumerate(re.findall( | ||||||
|                         r'(?s)<p[^>]+class="infotext"[^>]*>.*?<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX, |                         r'(?s)<p[^>]+class="infotext"[^>]*>.*?<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX, | ||||||
|                         webpage)): |                         webpage)): | ||||||
|                     entries.append({ |                     entries.append({ | ||||||
|                         'id': display_id, |                         'id': display_id, | ||||||
|                         'title': '%s-%d' % (entry_title, num), |                         'title': '%s-%d' % (entry_title, num), | ||||||
|                         'formats': self._extract_formats(download_text), |                         'formats': self._extract_formats(download_text, media_kind), | ||||||
|                     }) |                     }) | ||||||
|                 return self.playlist_result(entries, display_id, title) |                 return self.playlist_result(entries, display_id, title) | ||||||
|             else:  # Assume single video |             else:  # Assume single video | ||||||
|                 download_text = self._search_regex(DOWNLOAD_REGEX, webpage, 'download links') |                 download_text = self._search_regex( | ||||||
|                 formats = self._extract_formats(download_text) |                     DOWNLOAD_REGEX, webpage, 'download links', group='links') | ||||||
|  |                 media_kind = self._search_regex( | ||||||
|  |                     DOWNLOAD_REGEX, webpage, 'media kind', default='Video', group='links') | ||||||
|  |                 formats = self._extract_formats(download_text, media_kind) | ||||||
|                 thumbnail = self._og_search_thumbnail(webpage) |                 thumbnail = self._og_search_thumbnail(webpage) | ||||||
|                 description = self._html_search_regex( |                 description = self._html_search_regex( | ||||||
|                     r'(?s)<p class="teasertext">(.*?)</p>', |                     r'(?s)<p class="teasertext">(.*?)</p>', | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user