Merge branch 'master' into BlenderCloud-issue-13282

This commit is contained in:
Parmjit Virk 2017-07-01 20:55:09 -05:00
commit 4d86d8b7aa
3 changed files with 36 additions and 16 deletions

View File

@ -977,6 +977,7 @@ from .tagesschau import (
TagesschauIE, TagesschauIE,
) )
from .tass import TassIE from .tass import TassIE
from .tastytrade import TastyTradeIE
from .tbs import TBSIE from .tbs import TBSIE
from .tdslifeway import TDSLifewayIE from .tdslifeway import TDSLifewayIE
from .teachertube import ( from .teachertube import (

View File

@ -6,7 +6,10 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
from ..utils import int_or_none from ..utils import (
int_or_none,
try_get,
)
class TEDIE(InfoExtractor): class TEDIE(InfoExtractor):
@ -113,8 +116,9 @@ class TEDIE(InfoExtractor):
} }
def _extract_info(self, webpage): def _extract_info(self, webpage):
info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>', info_json = self._search_regex(
webpage, 'info json') r'(?s)q\(\s*"\w+.init"\s*,\s*({.+})\)\s*</script>',
webpage, 'info json')
return json.loads(info_json) return json.loads(info_json)
def _real_extract(self, url): def _real_extract(self, url):
@ -136,11 +140,16 @@ class TEDIE(InfoExtractor):
webpage = self._download_webpage(url, name, webpage = self._download_webpage(url, name,
'Downloading playlist webpage') 'Downloading playlist webpage')
info = self._extract_info(webpage) info = self._extract_info(webpage)
playlist_info = info['playlist']
playlist_info = try_get(
info, lambda x: x['__INITIAL_DATA__']['playlist'],
dict) or info['playlist']
playlist_entries = [ playlist_entries = [
self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key()) self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key())
for talk in info['talks'] for talk in try_get(
info, lambda x: x['__INITIAL_DATA__']['talks'],
dict) or info['talks']
] ]
return self.playlist_result( return self.playlist_result(
playlist_entries, playlist_entries,
@ -149,9 +158,14 @@ class TEDIE(InfoExtractor):
def _talk_info(self, url, video_name): def _talk_info(self, url, video_name):
webpage = self._download_webpage(url, video_name) webpage = self._download_webpage(url, video_name)
self.report_extraction(video_name)
talk_info = self._extract_info(webpage)['talks'][0] info = self._extract_info(webpage)
talk_info = try_get(
info, lambda x: x['__INITIAL_DATA__']['talks'][0],
dict) or info['talks'][0]
title = talk_info['title'].strip()
external = talk_info.get('external') external = talk_info.get('external')
if external: if external:
@ -165,19 +179,27 @@ class TEDIE(InfoExtractor):
'url': ext_url or external['uri'], 'url': ext_url or external['uri'],
} }
native_downloads = try_get(
talk_info, lambda x: x['downloads']['nativeDownloads'],
dict) or talk_info['nativeDownloads']
formats = [{ formats = [{
'url': format_url, 'url': format_url,
'format_id': format_id, 'format_id': format_id,
'format': format_id, 'format': format_id,
} for (format_id, format_url) in talk_info['nativeDownloads'].items() if format_url is not None] } for (format_id, format_url) in native_downloads.items() if format_url is not None]
if formats: if formats:
for f in formats: for f in formats:
finfo = self._NATIVE_FORMATS.get(f['format_id']) finfo = self._NATIVE_FORMATS.get(f['format_id'])
if finfo: if finfo:
f.update(finfo) f.update(finfo)
player_talk = talk_info['player_talks'][0]
resources_ = player_talk.get('resources') or talk_info.get('resources')
http_url = None http_url = None
for format_id, resources in talk_info['resources'].items(): for format_id, resources in resources_.items():
if format_id == 'h264': if format_id == 'h264':
for resource in resources: for resource in resources:
h264_url = resource.get('file') h264_url = resource.get('file')
@ -237,14 +259,11 @@ class TEDIE(InfoExtractor):
video_id = compat_str(talk_info['id']) video_id = compat_str(talk_info['id'])
thumbnail = talk_info['thumb']
if not thumbnail.startswith('http'):
thumbnail = 'http://' + thumbnail
return { return {
'id': video_id, 'id': video_id,
'title': talk_info['title'].strip(), 'title': title,
'uploader': talk_info['speaker'], 'uploader': player_talk.get('speaker') or talk_info.get('speaker'),
'thumbnail': thumbnail, 'thumbnail': player_talk.get('thumb') or talk_info.get('thumb'),
'description': self._og_search_description(webpage), 'description': self._og_search_description(webpage),
'subtitles': self._get_subtitles(video_id, talk_info), 'subtitles': self._get_subtitles(video_id, talk_info),
'formats': formats, 'formats': formats,

View File

@ -157,7 +157,7 @@ class XFileShareIE(InfoExtractor):
def extract_formats(default=NO_DEFAULT): def extract_formats(default=NO_DEFAULT):
urls = [] urls = []
for regex in ( for regex in (
r'file\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1', r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1', r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)', r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'): r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):