reuters: fix syntax
This commit is contained in:
parent
1ee382603f
commit
d4ebd851ef
@ -4,14 +4,12 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_iso8601,
|
ExtractorError,
|
||||||
strip_jsonp,
|
|
||||||
js_to_json,
|
js_to_json,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
)
|
)
|
||||||
import re
|
|
||||||
from pprint import pprint
|
|
||||||
|
|
||||||
class YospaceIE(InfoExtractor):
|
class YospaceIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:csm-[a-z]|mas-[a-z]).cds\d+.yospace.com/(?P<type>csm|mas)/(?P<id>\d+/\d+)'
|
_VALID_URL = r'http://(?:csm-[a-z]|mas-[a-z]).cds\d+.yospace.com/(?P<type>csm|mas)/(?P<id>\d+/\d+)'
|
||||||
@ -64,7 +62,6 @@ class YospaceIE(InfoExtractor):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
url_type = mobj.group('type')
|
url_type = mobj.group('type')
|
||||||
display_id = url_type
|
display_id = url_type
|
||||||
title = display_id
|
|
||||||
formats = []
|
formats = []
|
||||||
hls_url = None
|
hls_url = None
|
||||||
|
|
||||||
@ -77,7 +74,6 @@ class YospaceIE(InfoExtractor):
|
|||||||
if hls_url is not None:
|
if hls_url is not None:
|
||||||
formats.extend(self._extract_m3u8(hls_url))
|
formats.extend(self._extract_m3u8(hls_url))
|
||||||
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@ -87,6 +83,7 @@ class YospaceIE(InfoExtractor):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class ReutersIE(YospaceIE):
|
class ReutersIE(YospaceIE):
|
||||||
_VALID_URL = r'http://(?:www\.)?reuters.com/.*?(?P<id>[^/]+)$'
|
_VALID_URL = r'http://(?:www\.)?reuters.com/.*?(?P<id>[^/]+)$'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
@ -123,15 +120,17 @@ class ReutersIE(YospaceIE):
|
|||||||
javascript_chunks = re.findall(r'<script[^>]+text/javascript[^>]*>(.*?)</script>', webpage, re.DOTALL)
|
javascript_chunks = re.findall(r'<script[^>]+text/javascript[^>]*>(.*?)</script>', webpage, re.DOTALL)
|
||||||
if not javascript_chunks:
|
if not javascript_chunks:
|
||||||
return
|
return
|
||||||
|
|
||||||
def msub(m):
|
def msub(m):
|
||||||
s = m.group(1)
|
s = m.group(1)
|
||||||
if rdata.get(s):
|
if rdata.get(s):
|
||||||
s = rdata.get(s)
|
s = rdata.get(s)
|
||||||
return ': "' + s + '",\n'
|
return ': "' + s + '",\n'
|
||||||
return ': False,\n'
|
return ': False,\n'
|
||||||
|
|
||||||
vidnum = 0
|
vidnum = 0
|
||||||
for innerhtml in javascript_chunks:
|
for innerhtml in javascript_chunks:
|
||||||
drawplayer_js = re.search(r'Reuters.yovideo.drawPlayer\((\{[^\}]+://.+?\})\);',innerhtml,re.DOTALL);
|
drawplayer_js = re.search(r'Reuters.yovideo.drawPlayer\((\{[^\}]+://.+?\})\);', innerhtml, re.DOTALL)
|
||||||
if drawplayer_js:
|
if drawplayer_js:
|
||||||
vidnum += 1
|
vidnum += 1
|
||||||
drawplayer_js = re.sub(r'".+?"\s*:\s*[^\d"\'].+?,\n', '', drawplayer_js.group(1))
|
drawplayer_js = re.sub(r'".+?"\s*:\s*[^\d"\'].+?,\n', '', drawplayer_js.group(1))
|
||||||
@ -145,12 +144,12 @@ class ReutersIE(YospaceIE):
|
|||||||
if re.search(r'^\s*Reuters\.([^\s\[\]\.]+\.[^\[\]]+?)\s*=\s*[\'\"\d].+?;\s*\n', innerhtml, re.M):
|
if re.search(r'^\s*Reuters\.([^\s\[\]\.]+\.[^\[\]]+?)\s*=\s*[\'\"\d].+?;\s*\n', innerhtml, re.M):
|
||||||
js_vars = re.findall(r'^\s*Reuters\.([^\s\[\]\.]+\.[^\s\[\]\.]+)\s*=\s*[\'"]?(.*?)[\'"]?;\s*\n', innerhtml, re.M)
|
js_vars = re.findall(r'^\s*Reuters\.([^\s\[\]\.]+\.[^\s\[\]\.]+)\s*=\s*[\'"]?(.*?)[\'"]?;\s*\n', innerhtml, re.M)
|
||||||
for ent in js_vars:
|
for ent in js_vars:
|
||||||
if re.search(r'["\'].+?[\(\)\+]',ent[1]):
|
|
||||||
continue
|
|
||||||
if not ent[1]:
|
if not ent[1]:
|
||||||
continue
|
continue
|
||||||
|
if re.search(r'["\'].+?[\(\)\+]', ent[1]):
|
||||||
|
continue
|
||||||
rdata[ent[0]] = ent[1]
|
rdata[ent[0]] = ent[1]
|
||||||
drawplayer_js = re.search(r'Reuters.yovideo.drawPlayer\((\{.+?\})\);',innerhtml,re.DOTALL);
|
drawplayer_js = re.search(r'Reuters.yovideo.drawPlayer\((\{.+?\})\);', innerhtml, re.DOTALL)
|
||||||
if drawplayer_js:
|
if drawplayer_js:
|
||||||
vidnum += 1
|
vidnum += 1
|
||||||
ds = drawplayer_js.group(1)
|
ds = drawplayer_js.group(1)
|
||||||
@ -163,7 +162,6 @@ class ReutersIE(YospaceIE):
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
from .yospace import YospaceIE
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
ret = []
|
ret = []
|
||||||
@ -186,7 +184,7 @@ class ReutersIE(YospaceIE):
|
|||||||
if yo_id_str:
|
if yo_id_str:
|
||||||
yo_id = yo_id_str.group(1) + '/' + yo_id_str.group(2)
|
yo_id = yo_id_str.group(1) + '/' + yo_id_str.group(2)
|
||||||
murl = 'http://mas-e.cds1.yospace.com/mas/' + yo_id + '?trans=json'
|
murl = 'http://mas-e.cds1.yospace.com/mas/' + yo_id + '?trans=json'
|
||||||
yurl = 'http://csm-e.cds1.yospace.com/csm/'+yo_id
|
# yurl = 'http://csm-e.cds1.yospace.com/csm/'+yo_id
|
||||||
formats.extend(self._extract_formats(murl, video_id))
|
formats.extend(self._extract_formats(murl, video_id))
|
||||||
if formats:
|
if formats:
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
@ -202,5 +200,3 @@ class ReutersIE(YospaceIE):
|
|||||||
if len(ret) > 1:
|
if len(ret) > 1:
|
||||||
return self.playlist_result(ret, video_id, 'reuters')
|
return self.playlist_result(ret, video_id, 'reuters')
|
||||||
return ret[0]
|
return ret[0]
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user