[liveleak] extract multiple videos

This commit is contained in:
aeph6Ee0 2015-09-15 14:28:03 +02:00
parent 31208a07c2
commit 0b0fb6fd42

View File

@ -53,9 +53,26 @@ class LiveLeakIE(InfoExtractor):
}
}]
video_count = 0
def _video_count(self):
self.video_count += 1
if self.video_count == 1:
return ''
else:
return '-' + str(self.video_count-1)
# Removing '.h264_*.mp4' gives the raw video, which is essentially
# the same video without the LiveLeak logo at the top (see
# https://github.com/rg3/youtube-dl/pull/4768)
def _get_orig_video_url(self, url):
return re.sub(r'\.h264_.+?\.mp4', '', url)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
entries = list() # collect all found videos
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
video_description = self._og_search_description(webpage)
@ -65,55 +82,94 @@ class LiveLeakIE(InfoExtractor):
r'you confirm that you are ([0-9]+) years and over.',
webpage, 'age limit', default=None))
# extracts native video #1 (single video, maybe multiple formats)
sources_raw = self._search_regex(
r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None)
if sources_raw is None:
alt_source = self._search_regex(
r'(file: ".*?"),', webpage, 'video URL', default=None)
if alt_source:
sources_raw = '[{ %s}]' % alt_source
else:
# Maybe an embed?
embed_url = self._search_regex(
r'<iframe[^>]+src="(http://www.prochan.com/embed\?[^"]+)"',
webpage, 'embed URL')
return {
'_type': 'url_transparent',
'url': embed_url,
'id': video_id,
'title': video_title,
'description': video_description,
'uploader': video_uploader,
'age_limit': age_limit,
}
if sources_raw:
sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw)
sources = json.loads(sources_json)
sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw)
sources = json.loads(sources_json)
formats = [{
'format_id': '%s' % i,
'format_note': s.get('label'),
'url': s['file'],
} for i, s in enumerate(sources)]
for i, s in enumerate(sources):
orig_url = self._get_orig_video_url(s['file'])
if s['file'] != orig_url:
formats.append({
'format_id': 'original-%s' % i,
'format_note': s.get('label'),
'url': orig_url,
'preference': 1,
})
self._sort_formats(formats)
formats = [{
'format_id': '%s' % i,
'format_note': s.get('label'),
'url': s['file'],
} for i, s in enumerate(sources)]
for i, s in enumerate(sources):
# Removing '.h264_*.mp4' gives the raw video, which is essentially
# the same video without the LiveLeak logo at the top (see
# https://github.com/rg3/youtube-dl/pull/4768)
orig_url = re.sub(r'\.h264_.+?\.mp4', '', s['file'])
if s['file'] != orig_url:
entries.append({
'id': page_id,
'title': video_title,
'description': video_description,
'uploader': video_uploader,
'formats': formats,
'age_limit': age_limit,
})
# extracts native videos #2 (maybe multiple videos, single format)
sources = re.findall(r'(?s)jwplayer\("file_[0-9a-f]+"\).+?file: "(.*?)"', webpage)
for url in sources:
formats = [{
'format_id': '0',
'format_note': 'standard quality (with logo)',
'url': url,
}]
orig_url = self._get_orig_video_url(url)
if orig_url != url:
formats.append({
'format_id': 'original-%s' % i,
'format_note': s.get('label'),
'format_id': '1',
'format_note': 'high quality (no logo)',
'url': orig_url,
'preference': 1,
})
self._sort_formats(formats)
entries.append({
'id': page_id + self._video_count(),
'title': video_title,
'description': video_description,
'uploader': video_uploader,
'formats': formats,
'age_limit': age_limit,
})
return {
'id': video_id,
'title': video_title,
'description': video_description,
'uploader': video_uploader,
'formats': formats,
'age_limit': age_limit,
}
# collect embedded videos:
embed_urls = list()
# prochan.com:
embed_prochan = (re.findall(
r'<iframe[^>]+src="(http://www.prochan.com/embed\?[^"]+)"',
webpage))
if len(embed_prochan) > 0:
for embed in embed_prochan:
embed_urls.append(embed)
# add all collected embed urls
for embed_url in embed_urls:
entries.append({
'_type': 'url_transparent',
'id': page_id + self._video_count(),
'url': embed_url,
'title': video_title,
'description': video_description,
'uploader': video_uploader,
'age_limit': age_limit,
})
if len(entries) == 0:
raise ExtractorError('No videos found')
if len(entries) == 1:
return entries[0]
else:
return {
'_type': 'multi_video',
'id': page_id,
'entries': entries,
}