2015-07-13 07:43:48 -05:00
|
|
|
# coding: utf-8
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
|
|
import re
|
|
|
|
from ..utils import (
|
|
|
|
ExtractorError,
|
|
|
|
unescapeHTML,
|
|
|
|
js_to_json,
|
|
|
|
)
|
|
|
|
from .common import InfoExtractor
|
|
|
|
|
|
|
|
|
|
|
|
class MemriIE(InfoExtractor):
|
|
|
|
_VALID_URL = r'https?://(?:www\.)?memri(?:tv)?.org/(?:clip(?:/[^/]+)*/(?P<id>\d+)\.html?|.+clip_id=(?P<eid>\d+))'
|
|
|
|
IE_NAME = 'memri'
|
|
|
|
_TESTS = [{
|
|
|
|
'url': 'http://www.memritv.org/clip/en/4496.htm',
|
|
|
|
'info_dict': {
|
|
|
|
'id': '4496',
|
|
|
|
'ext': 'mp4',
|
|
|
|
'title': 'Takfiri, The Caliph\'s Favorite Cheese - Anti-ISIS Iraqi Satire',
|
|
|
|
'uploader': 'Memri',
|
|
|
|
},
|
|
|
|
'params': {
|
|
|
|
# m3u8 download
|
|
|
|
'skip_download': True,
|
|
|
|
},
|
|
|
|
}]
|
|
|
|
|
|
|
|
def _real_extract(self, url):
|
|
|
|
mobj = re.match(self._VALID_URL, url)
|
|
|
|
video_id = mobj.group('id') or mobj.group('eid')
|
|
|
|
rurl = url
|
2015-07-17 03:02:57 -05:00
|
|
|
if mobj.groupdict().get('eid') is None:
|
|
|
|
rurl = 'http://www.memritv.org/embedded_player/index.php?clip_id=' + video_id
|
2015-07-13 07:43:48 -05:00
|
|
|
|
|
|
|
webpage = self._download_webpage(rurl, video_id)
|
2015-07-17 03:02:57 -05:00
|
|
|
jstr = self._search_regex(r'var config_overrides =.+?({.+?});', webpage, 'json', flags=re.DOTALL)
|
|
|
|
jstr = re.sub(r'\n\s*//.*?\n', '\n', jstr) # // comments break js_to_json
|
|
|
|
js = self._parse_json(jstr, 'json', transform_source=js_to_json)
|
2015-07-13 07:43:48 -05:00
|
|
|
|
|
|
|
formats = []
|
|
|
|
for ent in js['media']['source']:
|
|
|
|
eurl = ent.get('src')
|
2015-07-17 03:02:57 -05:00
|
|
|
if ent.get('type', '') == 'application/x-mpegURL':
|
|
|
|
formats.extend(self._extract_m3u8_formats(
|
|
|
|
eurl, video_id, entry_protocol='m3u8', ext='mp4',
|
|
|
|
m3u8_id='m3u8-mp4',
|
|
|
|
preference=0)
|
2015-07-13 07:43:48 -05:00
|
|
|
)
|
|
|
|
continue
|
2015-07-17 03:02:57 -05:00
|
|
|
proto = re.search(r'^(.+?)://', eurl).group(1)
|
2015-07-13 07:43:48 -05:00
|
|
|
format = {
|
2015-07-17 03:02:57 -05:00
|
|
|
'url': eurl,
|
|
|
|
'ext': 'mp4',
|
|
|
|
'protocol': proto,
|
|
|
|
'format_id': proto + '-mp4',
|
2015-07-13 07:43:48 -05:00
|
|
|
}
|
|
|
|
if proto == 'rtmp':
|
2015-07-17 03:02:57 -05:00
|
|
|
urlre = re.search(r'^(.+?)(mp4:[^\?]+)(.+)', eurl)
|
|
|
|
format['url'] = urlre.group(1) + urlre.group(3)
|
2015-07-13 07:43:48 -05:00
|
|
|
format['play_path'] = urlre.group(2)
|
|
|
|
formats.append(format)
|
|
|
|
if not formats:
|
|
|
|
if self._downloader.params.get('verbose', False):
|
2015-07-17 03:02:57 -05:00
|
|
|
raise ExtractorError('No video found in ' + jstr + '\n')
|
2015-07-13 07:43:48 -05:00
|
|
|
else:
|
|
|
|
raise ExtractorError('No video found')
|
|
|
|
|
|
|
|
self._sort_formats(formats)
|
|
|
|
return {
|
2015-07-17 03:02:57 -05:00
|
|
|
'id': video_id,
|
|
|
|
'title': unescapeHTML(js['media']['title']),
|
|
|
|
'uploader': 'Memri',
|
|
|
|
'formats': formats,
|
2015-07-13 07:43:48 -05:00
|
|
|
}
|