Merge pull request #2 from rg3/master

update `1st may
This commit is contained in:
dntt1 2016-05-01 11:57:32 +05:30
commit b4c763e660
7 changed files with 331 additions and 185 deletions

View File

@ -168,3 +168,4 @@ José Joaquín Atria
Viťas Strádal Viťas Strádal
Kagami Hiiragi Kagami Hiiragi
Philip Huppert Philip Huppert
blahgeek

View File

@ -1,13 +1,9 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
parse_duration, parse_iso8601,
qualities,
unified_strdate,
) )
@ -19,14 +15,14 @@ class CCCIE(InfoExtractor):
'url': 'https://media.ccc.de/v/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor#video', 'url': 'https://media.ccc.de/v/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor#video',
'md5': '3a1eda8f3a29515d27f5adb967d7e740', 'md5': '3a1eda8f3a29515d27f5adb967d7e740',
'info_dict': { 'info_dict': {
'id': '30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor', 'id': '1839',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Introduction to Processor Design', 'title': 'Introduction to Processor Design',
'description': 'md5:80be298773966f66d56cb11260b879af', 'description': 'md5:df55f6d073d4ceae55aae6f2fd98a0ac',
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
'view_count': int,
'upload_date': '20131228', 'upload_date': '20131228',
'duration': 3660, 'timestamp': 1388188800,
'duration': 3710,
} }
}, { }, {
'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download', 'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download',
@ -34,79 +30,48 @@ class CCCIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, display_id)
event_id = self._search_regex("data-id='(\d+)'", webpage, 'event id')
event_data = self._download_json('https://media.ccc.de/public/events/%s' % event_id, event_id)
if self._downloader.params.get('prefer_free_formats'):
preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd'])
else:
preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd'])
title = self._html_search_regex(
r'(?s)<h1>(.*?)</h1>', webpage, 'title')
description = self._html_search_regex(
r'(?s)<h3>About</h3>(.+?)<h3>',
webpage, 'description', fatal=False)
upload_date = unified_strdate(self._html_search_regex(
r"(?s)<span[^>]+class='[^']*fa-calendar-o'[^>]*>(.+?)</span>",
webpage, 'upload date', fatal=False))
view_count = int_or_none(self._html_search_regex(
r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",
webpage, 'view count', fatal=False))
duration = parse_duration(self._html_search_regex(
r'(?s)<span[^>]+class=(["\']).*?fa-clock-o.*?\1[^>]*></span>(?P<duration>.+?)</li',
webpage, 'duration', fatal=False, group='duration'))
matches = re.finditer(r'''(?xs)
<(?:span|div)\s+class='label\s+filetype'>(?P<format>[^<]*)</(?:span|div)>\s*
<(?:span|div)\s+class='label\s+filetype'>(?P<lang>[^<]*)</(?:span|div)>\s*
<a\s+download\s+href='(?P<http_url>[^']+)'>\s*
(?:
.*?
<a\s+(?:download\s+)?href='(?P<torrent_url>[^']+\.torrent)'
)?''', webpage)
formats = [] formats = []
for m in matches: for recording in event_data.get('recordings', []):
format = m.group('format') recording_url = recording.get('recording_url')
format_id = self._search_regex( if not recording_url:
r'.*/([a-z0-9_-]+)/[^/]*$', continue
m.group('http_url'), 'format id', default=None) language = recording.get('language')
if format_id: folder = recording.get('folder')
format_id = m.group('lang') + '-' + format_id format_id = None
vcodec = 'h264' if 'h264' in format_id else ( if language:
'none' if format_id in ('mp3', 'opus') else None format_id = language
if folder:
if language:
format_id += '-' + folder
else:
format_id = folder
vcodec = 'h264' if 'h264' in folder else (
'none' if folder in ('mp3', 'opus') else None
) )
formats.append({ formats.append({
'format_id': format_id, 'format_id': format_id,
'format': format, 'url': recording_url,
'language': m.group('lang'), 'width': int_or_none(recording.get('width')),
'url': m.group('http_url'), 'height': int_or_none(recording.get('height')),
'filesize': int_or_none(recording.get('size'), invscale=1024 * 1024),
'language': language,
'vcodec': vcodec, 'vcodec': vcodec,
'preference': preference(format_id),
}) })
if m.group('torrent_url'):
formats.append({
'format_id': 'torrent-%s' % (format if format_id is None else format_id),
'format': '%s (torrent)' % format,
'proto': 'torrent',
'format_note': '(unsupported; will just download the .torrent file)',
'vcodec': vcodec,
'preference': -100 + preference(format_id),
'url': m.group('torrent_url'),
})
self._sort_formats(formats) self._sort_formats(formats)
thumbnail = self._html_search_regex(
r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False)
return { return {
'id': video_id, 'id': event_id,
'title': title, 'display_id': display_id,
'description': description, 'title': event_data['title'],
'thumbnail': thumbnail, 'description': event_data.get('description'),
'view_count': view_count, 'thumbnail': event_data.get('thumb_url'),
'upload_date': upload_date, 'timestamp': parse_iso8601(event_data.get('date')),
'duration': duration, 'duration': int_or_none(event_data.get('length')),
'tags': event_data.get('tags'),
'formats': formats, 'formats': formats,
} }

View File

@ -33,6 +33,7 @@ class DiscoveryIE(InfoExtractor):
'duration': 156, 'duration': 156,
'timestamp': 1302032462, 'timestamp': 1302032462,
'upload_date': '20110405', 'upload_date': '20110405',
'uploader_id': '103207',
}, },
'params': { 'params': {
'skip_download': True, # requires ffmpeg 'skip_download': True, # requires ffmpeg
@ -54,7 +55,11 @@ class DiscoveryIE(InfoExtractor):
'upload_date': '20140725', 'upload_date': '20140725',
'timestamp': 1406246400, 'timestamp': 1406246400,
'duration': 116, 'duration': 116,
'uploader_id': '103207',
}, },
'params': {
'skip_download': True, # requires ffmpeg
}
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -66,13 +71,19 @@ class DiscoveryIE(InfoExtractor):
entries = [] entries = []
for idx, video_info in enumerate(info['playlist']): for idx, video_info in enumerate(info['playlist']):
formats = self._extract_m3u8_formats( subtitles = []
video_info['src'], display_id, 'mp4', 'm3u8_native', m3u8_id='hls', caption_url = video_info.get('captionsUrl')
note='Download m3u8 information for video %d' % (idx + 1)) if caption_url:
self._sort_formats(formats) subtitles = {
'en': [{
'url': caption_url,
}]
}
entries.append({ entries.append({
'_type': 'url_transparent',
'url': 'http://players.brightcove.net/103207/default_default/index.html?videoId=ref:%s' % video_info['referenceId'],
'id': compat_str(video_info['id']), 'id': compat_str(video_info['id']),
'formats': formats,
'title': video_info['title'], 'title': video_info['title'],
'description': video_info.get('description'), 'description': video_info.get('description'),
'duration': parse_duration(video_info.get('video_length')), 'duration': parse_duration(video_info.get('video_length')),
@ -80,6 +91,7 @@ class DiscoveryIE(InfoExtractor):
'thumbnail': video_info.get('thumbnailURL'), 'thumbnail': video_info.get('thumbnailURL'),
'alt_title': video_info.get('secondary_title'), 'alt_title': video_info.get('secondary_title'),
'timestamp': parse_iso8601(video_info.get('publishedDate')), 'timestamp': parse_iso8601(video_info.get('publishedDate')),
'subtitles': subtitles,
}) })
return self.playlist_result(entries, display_id, video_title) return self.playlist_result(entries, display_id, video_title)

View File

@ -724,7 +724,10 @@ from .svt import (
from .swrmediathek import SWRMediathekIE from .swrmediathek import SWRMediathekIE
from .syfy import SyfyIE from .syfy import SyfyIE
from .sztvhu import SztvHuIE from .sztvhu import SztvHuIE
from .tagesschau import TagesschauIE from .tagesschau import (
TagesschauPlayerIE,
TagesschauIE,
)
from .tapely import TapelyIE from .tapely import TapelyIE
from .tass import TassIE from .tass import TassIE
from .tdslifeway import TDSLifewayIE from .tdslifeway import TDSLifewayIE

View File

@ -125,10 +125,12 @@ class RtlNlIE(InfoExtractor):
try: try:
# Find hls format with the same width and height corresponding # Find hls format with the same width and height corresponding
# to progressive format and copy metadata from it. # to progressive format and copy metadata from it.
f = next(f for f in formats f = next(f for f in formats if f.get('height') == height)
if f.get('width') == width and f.get('height') == height).copy() # hls formats may have invalid width
f.update(pg_format(format_id, width, height)) f['width'] = width
pg_formats.append(f) f_copy = f.copy()
f_copy.update(pg_format(format_id, width, height))
pg_formats.append(f_copy)
except StopIteration: except StopIteration:
# Missing hls format does mean that no progressive format with # Missing hls format does mean that no progressive format with
# such width and height exists either. # such width and height exists either.

View File

@ -4,42 +4,178 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import parse_filesize from ..utils import (
determine_ext,
js_to_json,
parse_iso8601,
parse_filesize,
)
class TagesschauPlayerIE(InfoExtractor):
IE_NAME = 'tagesschau:player'
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?P<kind>audio|video)/(?P=kind)-(?P<id>\d+)~player(?:_[^/?#&]+)?\.html'
_TESTS = [{
'url': 'http://www.tagesschau.de/multimedia/video/video-179517~player.html',
'md5': '8d09548d5c15debad38bee3a4d15ca21',
'info_dict': {
'id': '179517',
'ext': 'mp4',
'title': 'Marie Kristin Boese, ARD Berlin, über den zukünftigen Kurs der AfD',
'thumbnail': 're:^https?:.*\.jpg$',
'formats': 'mincount:6',
},
}, {
'url': 'https://www.tagesschau.de/multimedia/audio/audio-29417~player.html',
'md5': '76e6eec6ebd40740671cf0a2c88617e5',
'info_dict': {
'id': '29417',
'ext': 'mp3',
'title': 'Trabi - Bye, bye Rennpappe',
'thumbnail': 're:^https?:.*\.jpg$',
'formats': 'mincount:2',
},
}, {
'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417~player_autoplay-true.html',
'only_matching': True,
}]
_FORMATS = {
'xs': {'quality': 0},
's': {'width': 320, 'height': 180, 'quality': 1},
'm': {'width': 512, 'height': 288, 'quality': 2},
'l': {'width': 960, 'height': 540, 'quality': 3},
'xl': {'width': 1280, 'height': 720, 'quality': 4},
'xxl': {'quality': 5},
}
def _extract_via_api(self, kind, video_id):
info = self._download_json(
'https://www.tagesschau.de/api/multimedia/{0}/{0}-{1}.json'.format(kind, video_id),
video_id)
title = info['headline']
formats = []
for media in info['mediadata']:
for format_id, format_url in media.items():
if determine_ext(format_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls'))
else:
formats.append({
'url': format_url,
'format_id': format_id,
'vcodec': 'none' if kind == 'audio' else None,
})
self._sort_formats(formats)
timestamp = parse_iso8601(info.get('date'))
return {
'id': video_id,
'title': title,
'timestamp': timestamp,
'formats': formats,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
# kind = mobj.group('kind').lower()
# if kind == 'video':
# return self._extract_via_api(kind, video_id)
# JSON api does not provide some audio formats (e.g. ogg) thus
# extractiong audio via webpage
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage).strip()
formats = []
for media_json in re.findall(r'({src\s*:\s*["\']http[^}]+type\s*:[^}]+})', webpage):
media = self._parse_json(js_to_json(media_json), video_id, fatal=False)
if not media:
continue
src = media.get('src')
if not src:
return
quality = media.get('quality')
kind = media.get('type', '').split('/')[0]
ext = determine_ext(src)
f = {
'url': src,
'format_id': '%s_%s' % (quality, ext) if quality else ext,
'ext': ext,
'vcodec': 'none' if kind == 'audio' else None,
}
f.update(self._FORMATS.get(quality, {}))
formats.append(f)
self._sort_formats(formats)
thumbnail = self._og_search_thumbnail(webpage)
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
}
class TagesschauIE(InfoExtractor): class TagesschauIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:[^/]+/)*?[^/#?]+?(?P<id>-?[0-9]+)(?:~_[^/#?]+?)?\.html' _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/(?P<path>[^/]+/(?:[^/]+/)*?(?P<id>[^/#?]+?(?:-?[0-9]+)?))(?:~_?[^/#?]+?)?\.html'
_TESTS = [{ _TESTS = [{
'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html', 'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
'md5': '917a228bc7df7850783bc47979673a09', 'md5': 'f7c27a0eff3bfe8c7727e65f8fe1b1e6',
'info_dict': { 'info_dict': {
'id': '102143', 'id': 'video-102143',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt', 'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
'description': 'md5:171feccd9d9b3dd54d05d501568f6359', 'description': '18.07.2015 20:10 Uhr',
'thumbnail': 're:^https?:.*\.jpg$', 'thumbnail': 're:^https?:.*\.jpg$',
}, },
}, { }, {
'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html', 'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
'md5': '3c54c1f6243d279b706bde660ceec633', 'md5': '3c54c1f6243d279b706bde660ceec633',
'info_dict': { 'info_dict': {
'id': '5727', 'id': 'ts-5727',
'ext': 'mp4', 'ext': 'mp4',
'description': 'md5:695c01bfd98b7e313c501386327aea59',
'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr', 'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr',
'description': 'md5:695c01bfd98b7e313c501386327aea59',
'thumbnail': 're:^https?:.*\.jpg$', 'thumbnail': 're:^https?:.*\.jpg$',
}, },
}, { }, {
'url': 'http://www.tagesschau.de/multimedia/politikimradio/audio-18407.html', # exclusive audio
'md5': 'aef45de271c4bf0a5db834aa40bf774c', 'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417.html',
'md5': '76e6eec6ebd40740671cf0a2c88617e5',
'info_dict': { 'info_dict': {
'id': '18407', 'id': 'audio-29417',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich', 'title': 'Trabi - Bye, bye Rennpappe',
'description': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich', 'description': 'md5:8687dda862cbbe2cfb2df09b56341317',
'thumbnail': 're:^https?:.*\.jpg$', 'thumbnail': 're:^https?:.*\.jpg$',
}, },
}, {
# audio in article
'url': 'http://www.tagesschau.de/inland/bnd-303.html',
'md5': 'e0916c623e85fc1d2b26b78f299d3958',
'info_dict': {
'id': 'bnd-303',
'ext': 'mp3',
'title': 'Viele Baustellen für neuen BND-Chef',
'description': 'md5:1e69a54be3e1255b2b07cdbce5bcd8b4',
'thumbnail': 're:^https?:.*\.jpg$',
},
}, {
'url': 'http://www.tagesschau.de/inland/afd-parteitag-135.html',
'info_dict': {
'id': 'afd-parteitag-135',
'title': 'Möchtegern-Underdog mit Machtanspruch',
},
'playlist_count': 2,
}, { }, {
'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html', 'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html',
'only_matching': True, 'only_matching': True,
@ -61,88 +197,108 @@ class TagesschauIE(InfoExtractor):
}, { }, {
'url': 'http://www.tagesschau.de/multimedia/video/video-102303~_bab-sendung-211.html', 'url': 'http://www.tagesschau.de/multimedia/video/video-102303~_bab-sendung-211.html',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.tagesschau.de/100sekunden/index.html',
'only_matching': True,
}, {
# playlist article with collapsing sections
'url': 'http://www.tagesschau.de/wirtschaft/faq-freihandelszone-eu-usa-101.html',
'only_matching': True,
}] }]
_FORMATS = { @classmethod
's': {'width': 256, 'height': 144, 'quality': 1}, def suitable(cls, url):
'm': {'width': 512, 'height': 288, 'quality': 2}, return False if TagesschauPlayerIE.suitable(url) else super(TagesschauIE, cls).suitable(url)
'l': {'width': 960, 'height': 544, 'quality': 3},
} def _extract_formats(self, download_text, media_kind):
links = re.finditer(
r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>',
download_text)
formats = []
for l in links:
link_url = l.group('url')
if not link_url:
continue
format_id = self._search_regex(
r'.*/[^/.]+\.([^/]+)\.[^/.]+$', link_url, 'format ID',
default=determine_ext(link_url))
format = {
'format_id': format_id,
'url': l.group('url'),
'format_name': l.group('name'),
}
title = l.group('title')
if title:
if media_kind.lower() == 'video':
m = re.match(
r'''(?x)
Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10;
(?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10;
(?P<vbr>[0-9]+)kbps&\#10;
Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10;
Gr&ouml;&szlig;e:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''',
title)
if m:
format.update({
'format_note': m.group('audio_desc'),
'vcodec': m.group('vcodec'),
'width': int(m.group('width')),
'height': int(m.group('height')),
'abr': int(m.group('abr')),
'vbr': int(m.group('vbr')),
'filesize_approx': parse_filesize(m.group('filesize_approx')),
})
else:
m = re.match(
r'(?P<format>.+?)-Format\s*:\s*(?P<abr>\d+)kbps\s*,\s*(?P<note>.+)',
title)
if m:
format.update({
'format_note': '%s, %s' % (m.group('format'), m.group('note')),
'vcodec': 'none',
'abr': int(m.group('abr')),
})
formats.append(format)
self._sort_formats(formats)
return formats
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') or mobj.group('path')
display_id = video_id.lstrip('-') display_id = video_id.lstrip('-')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
player_url = self._html_search_meta( title = self._html_search_regex(
'twitter:player', webpage, 'player URL', default=None) r'<span[^>]*class="headline"[^>]*>(.+?)</span>',
if player_url: webpage, 'title', default=None) or self._og_search_title(webpage)
playerpage = self._download_webpage(
player_url, display_id, 'Downloading player page')
formats = [] DOWNLOAD_REGEX = r'(?s)<p>Wir bieten dieses (?P<kind>Video|Audio) in folgenden Formaten zum Download an:</p>\s*<div class="controls">(?P<links>.*?)</div>\s*<p>'
for media in re.finditer(
r'''(?x) webpage_type = self._og_search_property('type', webpage, default=None)
(?P<q_url>["\'])(?P<url>http://media.+?)(?P=q_url) if webpage_type == 'website': # Article
,\s*type:(?P<q_type>["\'])(?P<type>video|audio)/(?P<ext>.+?)(?P=q_type) entries = []
(?:,\s*quality:(?P<q_quality>["\'])(?P<quality>.+?)(?P=q_quality))? for num, (entry_title, media_kind, download_text) in enumerate(re.findall(
''', playerpage): r'(?s)<p[^>]+class="infotext"[^>]*>\s*(?:<a[^>]+>)?\s*<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX,
url = media.group('url') webpage), 1):
type_ = media.group('type') entries.append({
ext = media.group('ext') 'id': '%s-%d' % (display_id, num),
res = media.group('quality') 'title': '%s' % entry_title,
f = { 'formats': self._extract_formats(download_text, media_kind),
'format_id': '%s_%s' % (res, ext) if res else ext, })
'url': url, if len(entries) > 1:
'ext': ext, return self.playlist_result(entries, display_id, title)
'vcodec': 'none' if type_ == 'audio' else None, formats = entries[0]['formats']
} else: # Assume single video
f.update(self._FORMATS.get(res, {}))
formats.append(f)
thumbnail = self._og_search_thumbnail(playerpage)
title = self._og_search_title(webpage).strip()
description = self._og_search_description(webpage).strip()
else:
download_text = self._search_regex( download_text = self._search_regex(
r'(?s)<p>Wir bieten dieses Video in folgenden Formaten zum Download an:</p>\s*<div class="controls">(.*?)</div>\s*<p>', DOWNLOAD_REGEX, webpage, 'download links', group='links')
webpage, 'download links') media_kind = self._search_regex(
links = re.finditer( DOWNLOAD_REGEX, webpage, 'media kind', default='Video', group='kind')
r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>', formats = self._extract_formats(download_text, media_kind)
download_text) thumbnail = self._og_search_thumbnail(webpage)
formats = [] description = self._html_search_regex(
for l in links: r'(?s)<p class="teasertext">(.*?)</p>',
format_id = self._search_regex( webpage, 'description', default=None)
r'.*/[^/.]+\.([^/]+)\.[^/.]+', l.group('url'), 'format ID')
format = {
'format_id': format_id,
'url': l.group('url'),
'format_name': l.group('name'),
}
m = re.match(
r'''(?x)
Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10;
(?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10;
(?P<vbr>[0-9]+)kbps&\#10;
Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10;
Gr&ouml;&szlig;e:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''',
l.group('title'))
if m:
format.update({
'format_note': m.group('audio_desc'),
'vcodec': m.group('vcodec'),
'width': int(m.group('width')),
'height': int(m.group('height')),
'abr': int(m.group('abr')),
'vbr': int(m.group('vbr')),
'filesize_approx': parse_filesize(m.group('filesize_approx')),
})
formats.append(format)
thumbnail = self._og_search_thumbnail(webpage)
description = self._html_search_regex(
r'(?s)<p class="teasertext">(.*?)</p>',
webpage, 'description', default=None)
title = self._html_search_regex(
r'<span class="headline".*?>(.*?)</span>', webpage, 'title')
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -389,23 +389,30 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
class FFmpegMetadataPP(FFmpegPostProcessor): class FFmpegMetadataPP(FFmpegPostProcessor):
def run(self, info): def run(self, info):
metadata = {} metadata = {}
if info.get('title') is not None:
metadata['title'] = info['title'] def add(meta_list, info_list=None):
if info.get('upload_date') is not None: if not info_list:
metadata['date'] = info['upload_date'] info_list = meta_list
if info.get('artist') is not None: if not isinstance(meta_list, (list, tuple)):
metadata['artist'] = info['artist'] meta_list = (meta_list,)
elif info.get('uploader') is not None: if not isinstance(info_list, (list, tuple)):
metadata['artist'] = info['uploader'] info_list = (info_list,)
elif info.get('uploader_id') is not None: for info_f in info_list:
metadata['artist'] = info['uploader_id'] if info.get(info_f) is not None:
if info.get('description') is not None: for meta_f in meta_list:
metadata['description'] = info['description'] metadata[meta_f] = info[info_f]
metadata['comment'] = info['description'] break
if info.get('webpage_url') is not None:
metadata['purl'] = info['webpage_url'] add('title', ('track', 'title'))
if info.get('album') is not None: add('date', 'upload_date')
metadata['album'] = info['album'] add(('description', 'comment'), 'description')
add('purl', 'webpage_url')
add('track', 'track_number')
add('artist', ('artist', 'creator', 'uploader', 'uploader_id'))
add('genre')
add('album')
add('album_artist')
add('disc', 'disc_number')
if not metadata: if not metadata:
self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add') self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')