Add support to Clappr
player https://github.com/clappr/clappr
New extractor Vidlox
This commit is contained in:
parent
40a051fa9f
commit
9dc48d44b5
@ -2480,6 +2480,85 @@ class InfoExtractor(object):
|
|||||||
m3u8_id='hls', fatal=False))
|
m3u8_id='hls', fatal=False))
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
|
def _find_clappr_data(self, webpage, video_id = None, transform_source=js_to_json):
|
||||||
|
"""
|
||||||
|
Find Clappr.Player data
|
||||||
|
http://clappr.github.io/classes/Player.html#method_constructor
|
||||||
|
"""
|
||||||
|
mobj = re.search(
|
||||||
|
r'new Clappr.Player\((?P<json>{.+?})\);',
|
||||||
|
webpage.replace("\n","").replace("\t",""))
|
||||||
|
if mobj:
|
||||||
|
try:
|
||||||
|
clappr_data = self._parse_json(mobj.group('json'),
|
||||||
|
video_id=video_id,
|
||||||
|
transform_source=transform_source)
|
||||||
|
except ExtractorError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
if isinstance(clappr_data, dict):
|
||||||
|
return clappr_data
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_clappr_data(self, clappr_data, video_id=None, require_title=True,
|
||||||
|
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
||||||
|
"""
|
||||||
|
Parse Clappr player data
|
||||||
|
http://clappr.github.io/classes/Player.html#method_constructor
|
||||||
|
"""
|
||||||
|
|
||||||
|
info_dict = {
|
||||||
|
'id': video_id,
|
||||||
|
'subtitles':{},
|
||||||
|
}
|
||||||
|
info_dict['formats'] = self._extract_url_list_formats(
|
||||||
|
clappr_data.get("sources", [clappr_data.get("source")]),
|
||||||
|
video_id=video_id,m3u8_id=m3u8_id, mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
|
||||||
|
|
||||||
|
thumbnail = clappr_data.get("poster")
|
||||||
|
if thumbnail:
|
||||||
|
info_dict['thumbnail'] = thumbnail
|
||||||
|
|
||||||
|
# Title from `chromecast` plugin https://github.com/deaathh/sdasdas
|
||||||
|
title = clappr_data.get('chromecast',{}).get('title')
|
||||||
|
if title:
|
||||||
|
info_dict['title'] = title
|
||||||
|
#Subtitles:
|
||||||
|
#https://github.com/clappr/clappr/blob/master/doc/BUILTIN_PLUGINS.md#playback-configuration
|
||||||
|
subtitles = clappr_data.get('externalTracks') or clappr_data.get('playback',{}).get('externalTracks')
|
||||||
|
if subtitles:
|
||||||
|
for sub in subtitles:
|
||||||
|
if sub.get('kind',"subtitles") != "subtitles":
|
||||||
|
continue
|
||||||
|
lang = sub.get('lang') or sub.get('language') or sub.get('label','undefined')
|
||||||
|
src = sub.get('src')
|
||||||
|
if not src:
|
||||||
|
continue
|
||||||
|
info_dict['subtitles'].setdefault(lang, []).append({
|
||||||
|
'url': compat_urlparse.urljoin(base_url,src),
|
||||||
|
'ext': determine_ext(src),
|
||||||
|
})
|
||||||
|
#https://github.com/JMVTechnology/Clappr-Subtitle
|
||||||
|
subtitle = clappr_data.get('subtitle')
|
||||||
|
if subtitle:
|
||||||
|
if isinstance(subtitle, dict):
|
||||||
|
src = subtitle.get("src")
|
||||||
|
lang = subtitle.get("lang") or subtitle.get('label')
|
||||||
|
else:
|
||||||
|
src = subtitle
|
||||||
|
if src:
|
||||||
|
src = compat_urlparse.urljoin(base_url,src)
|
||||||
|
ext = determine_ext(src)
|
||||||
|
if not lang:
|
||||||
|
lang = src.split('/')[-1]
|
||||||
|
if video_id in lang:
|
||||||
|
lang = lang.replace("%s_" % video_id,'').replace(video_id,'').replace(".%s" % ext, '')
|
||||||
|
info_dict['subtitles'].setdefault(lang, []).append({
|
||||||
|
'url': src,
|
||||||
|
'ext': ext,
|
||||||
|
})
|
||||||
|
return info_dict
|
||||||
|
|
||||||
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
|
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
|
||||||
query = compat_urlparse.urlparse(url).query
|
query = compat_urlparse.urlparse(url).query
|
||||||
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
|
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
|
||||||
@ -2533,6 +2612,54 @@ class InfoExtractor(object):
|
|||||||
})
|
})
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
|
def _extract_url_list_formats(self, sources, video_id=None,
|
||||||
|
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
||||||
|
"""
|
||||||
|
Transform ["url1", "url2", {source: <>, mimeType: <>}] to formats.
|
||||||
|
Knows
|
||||||
|
"""
|
||||||
|
formats = []
|
||||||
|
format_id = -1
|
||||||
|
for source in sources:
|
||||||
|
#The media source URL, or {source: <>, mimeType: <>}
|
||||||
|
if isinstance(source, dict):
|
||||||
|
source_url = source.get('source')
|
||||||
|
mime = source.get('mimeType')
|
||||||
|
else:
|
||||||
|
source_url = source
|
||||||
|
mime = None
|
||||||
|
|
||||||
|
format_id = format_id + 1
|
||||||
|
if base_url:
|
||||||
|
source_url = compat_urlparse.urljoin(base_url, source_url)
|
||||||
|
ext = mimetype2ext(mime) or determine_ext(source_url, 'mp4')
|
||||||
|
if ext == "m3u8":
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=m3u8_id, fatal=False, preference=1))
|
||||||
|
elif ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
source_url, video_id, mpd_id=mpd_id, fatal=False))
|
||||||
|
elif ext == 'smil':
|
||||||
|
formats.extend(self._extract_smil_formats(
|
||||||
|
source_url, video_id, fatal=False))
|
||||||
|
elif ext == "f4m":
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
source_url, video_id, m3u8_id=m3u8_id, fatal=False))
|
||||||
|
else:
|
||||||
|
urlh = self._request_webpage(source_url, video_id, note="Checking format %d information"%format_id, fatal=False)
|
||||||
|
size = int(urlh.headers.get('Content-Length'))
|
||||||
|
formats.append({
|
||||||
|
'url': source_url,
|
||||||
|
'ext': ext,
|
||||||
|
'format_id': "%d" % format_id,
|
||||||
|
'filesize': size,
|
||||||
|
'preference': int(size / 1024 / 1024 / 10 ),
|
||||||
|
})
|
||||||
|
if len(formats) == 0:
|
||||||
|
raise ExtractorError('Source not found', expected=True, video_id=video_id)
|
||||||
|
return formats
|
||||||
|
|
||||||
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
|
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'(?s)jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)(?!</script>).*?\.setup\s*\((?P<options>[^)]+)\)',
|
r'(?s)jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)(?!</script>).*?\.setup\s*\((?P<options>[^)]+)\)',
|
||||||
|
@ -1266,6 +1266,7 @@ from .viewlift import (
|
|||||||
ViewLiftEmbedIE,
|
ViewLiftEmbedIE,
|
||||||
)
|
)
|
||||||
from .viewster import ViewsterIE
|
from .viewster import ViewsterIE
|
||||||
|
from .vidlox import VidloxIE
|
||||||
from .viidea import ViideaIE
|
from .viidea import ViideaIE
|
||||||
from .vimeo import (
|
from .vimeo import (
|
||||||
VimeoIE,
|
VimeoIE,
|
||||||
|
@ -2060,6 +2060,16 @@ class GenericIE(InfoExtractor):
|
|||||||
'skip': 'TODO: fix nested playlists processing in tests',
|
'skip': 'TODO: fix nested playlists processing in tests',
|
||||||
},
|
},
|
||||||
# {
|
# {
|
||||||
|
# # Clappr.Player({})
|
||||||
|
# 'url': 'http://demo.teleosmedia.com/mosaic/',
|
||||||
|
# 'md5': "TODO",
|
||||||
|
# 'info_dict': {
|
||||||
|
# 'id': 'mosaic',
|
||||||
|
# 'title': 'video',
|
||||||
|
# 'ext': 'mp4'
|
||||||
|
# },
|
||||||
|
# },
|
||||||
|
# {
|
||||||
# # TODO: find another test
|
# # TODO: find another test
|
||||||
# # http://schema.org/VideoObject
|
# # http://schema.org/VideoObject
|
||||||
# 'url': 'https://flipagram.com/f/nyvTSJMKId',
|
# 'url': 'https://flipagram.com/f/nyvTSJMKId',
|
||||||
@ -3118,6 +3128,13 @@ class GenericIE(InfoExtractor):
|
|||||||
jwplayer_data, video_id, require_title=False, base_url=url)
|
jwplayer_data, video_id, require_title=False, base_url=url)
|
||||||
return merge_dicts(info, info_dict)
|
return merge_dicts(info, info_dict)
|
||||||
|
|
||||||
|
# Clappr.player()
|
||||||
|
clappr_dict = self._find_clappr_data(webpage, video_id)
|
||||||
|
if clappr_dict:
|
||||||
|
info = self._parse_clappr_data(clappr_dict,
|
||||||
|
video_id=video_id, base_url=url)
|
||||||
|
return merge_dicts(info, info_dict)
|
||||||
|
|
||||||
# Video.js embed
|
# Video.js embed
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
|
r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
|
||||||
|
75
youtube_dl/extractor/vidlox.py
Normal file
75
youtube_dl/extractor/vidlox.py
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .openload import PhantomJSwrapper
|
||||||
|
|
||||||
|
|
||||||
|
class VidloxIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?vidlox\.(?:me|tv)/(?:embed-)?(?P<id>[0-9a-z]+)(?:\.html)?'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://vidlox.me/5tq733o3wj1d',
|
||||||
|
'md5': 'f780592146ad0458679064de891f3e3f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5tq733o3wj1d',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': r're:big buck bunny 1080p surround',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'subtitles': {
|
||||||
|
'Spanish': [{
|
||||||
|
'ext': 'srt',
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://vidlox.me/embed-bs2nk6dgqio1.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
page_url = "https://vidlox.me/%s" % video_id
|
||||||
|
phantom = PhantomJSwrapper(self, required_version='2.0')
|
||||||
|
|
||||||
|
# download page for couple simple test
|
||||||
|
webpage = self._download_webpage(page_url, video_id).replace("\n","").replace("\t","")
|
||||||
|
if 'File not found' in webpage:
|
||||||
|
raise ExtractorError('File not found', expected=True, video_id=video_id)
|
||||||
|
|
||||||
|
title = None
|
||||||
|
if 'This video can be watched as embed only.' in webpage:
|
||||||
|
# extract tilte and download embed
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<title[^>]*?>(?P<title>.+?)\s*</title>', webpage, 'title').replace('Watch ','',1)
|
||||||
|
webpage = None
|
||||||
|
page_url = "https://vidlox.me/embed-%s.html" % video_id
|
||||||
|
|
||||||
|
# execute JS
|
||||||
|
webpage, _ = phantom.get(page_url, webpage, video_id=video_id)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# extract player data
|
||||||
|
clappr_dict = self._find_clappr_data(webpage, video_id)
|
||||||
|
if not clappr_dict:
|
||||||
|
raise ExtractorError('Player data not found',
|
||||||
|
expected=False, video_id=video_id)
|
||||||
|
|
||||||
|
# and parse it
|
||||||
|
info_dict = self._parse_clappr_data(clappr_dict,
|
||||||
|
video_id=video_id, base_url=page_url)
|
||||||
|
|
||||||
|
info_dict['title'] = title or self._html_search_regex(
|
||||||
|
r'<h1[^>]*?>(?P<title>.+?)\s*</h1>', webpage, 'title')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return info_dict
|
Loading…
x
Reference in New Issue
Block a user