2018-07-12 17:42:47 +02:00
|
|
|
# coding: utf-8
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
2018-07-12 17:55:19 +02:00
|
|
|
from ..utils import ExtractorError
|
|
|
|
from .common import InfoExtractor
|
2018-07-12 17:42:47 +02:00
|
|
|
from .openload import PhantomJSwrapper
|
|
|
|
|
|
|
|
|
|
|
|
class VidloxIE(InfoExtractor):
|
|
|
|
_VALID_URL = r'https?://(?:www\.)?vidlox\.(?:me|tv)/(?:embed-)?(?P<id>[0-9a-z]+)(?:\.html)?'
|
|
|
|
|
|
|
|
_TESTS = [{
|
|
|
|
'url': 'https://vidlox.me/5tq733o3wj1d',
|
|
|
|
'md5': 'f780592146ad0458679064de891f3e3f',
|
|
|
|
'info_dict': {
|
|
|
|
'id': '5tq733o3wj1d',
|
|
|
|
'ext': 'mp4',
|
|
|
|
'title': r're:big buck bunny 1080p surround',
|
|
|
|
'thumbnail': r're:^https?://.*\.jpg$',
|
|
|
|
'subtitles': {
|
|
|
|
'Spanish': [{
|
|
|
|
'ext': 'srt',
|
|
|
|
}],
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}, {
|
|
|
|
'url': 'https://vidlox.me/embed-bs2nk6dgqio1.html',
|
|
|
|
'only_matching': True,
|
|
|
|
}]
|
|
|
|
|
|
|
|
def _real_extract(self, url):
|
|
|
|
|
|
|
|
video_id = self._match_id(url)
|
2018-07-13 01:06:49 +02:00
|
|
|
page_url = 'https://vidlox.me/%s' % video_id
|
2018-07-12 17:42:47 +02:00
|
|
|
phantom = PhantomJSwrapper(self, required_version='2.0')
|
|
|
|
|
|
|
|
# download page for couple simple test
|
2018-07-13 01:06:49 +02:00
|
|
|
webpage = self._download_webpage(page_url, video_id).replace('\n', '').replace('\t', '')
|
2018-07-12 17:42:47 +02:00
|
|
|
if 'File not found' in webpage:
|
|
|
|
raise ExtractorError('File not found', expected=True, video_id=video_id)
|
|
|
|
|
|
|
|
title = None
|
|
|
|
if 'This video can be watched as embed only.' in webpage:
|
|
|
|
# extract tilte and download embed
|
2018-07-12 17:55:19 +02:00
|
|
|
title = self._html_search_regex(r'<title[^>]*?>(?P<title>.+?)\s*</title>', webpage, 'title').replace('Watch ', '', 1)
|
2018-07-12 17:42:47 +02:00
|
|
|
webpage = None
|
2018-07-13 01:06:49 +02:00
|
|
|
page_url = 'https://vidlox.me/embed-%s.html' % video_id
|
2018-07-12 17:42:47 +02:00
|
|
|
|
|
|
|
# execute JS
|
|
|
|
webpage, _ = phantom.get(page_url, webpage, video_id=video_id)
|
|
|
|
|
|
|
|
# extract player data
|
|
|
|
clappr_dict = self._find_clappr_data(webpage, video_id)
|
|
|
|
if not clappr_dict:
|
2018-07-12 17:55:19 +02:00
|
|
|
raise ExtractorError('Clappr data not found', expected=False, video_id=video_id)
|
2018-07-12 17:42:47 +02:00
|
|
|
|
|
|
|
# and parse it
|
2018-07-12 17:55:19 +02:00
|
|
|
info_dict = self._parse_clappr_data(clappr_dict, video_id=video_id, base_url=page_url)
|
2018-07-12 17:42:47 +02:00
|
|
|
|
2018-07-12 17:55:19 +02:00
|
|
|
info_dict['title'] = title or self._html_search_regex(r'<h1[^>]*?>(?P<title>.+?)\s*</h1>', webpage, 'title')
|
2018-07-12 17:42:47 +02:00
|
|
|
|
|
|
|
return info_dict
|