From ab72d143071cc253224b6e35962edb1dc9a34939 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Fri, 7 Apr 2017 23:23:31 -0400 Subject: [PATCH] [kaltura] Support iframe embeds, with test Note that these need to back to through the Generic extractor because the iframe URLs may be redirects that cannot be parsed by KalturaIE without being followed, and Generic checks for such redirects and follows them. Hence dropping the IE from url_result(). --- youtube_dl/extractor/generic.py | 17 ++++++++++++++++- youtube_dl/extractor/kaltura.py | 7 +++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 73911940c..4bfa3f8a1 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1080,6 +1080,21 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Kaltura'], }, + { + # Kaltura iframe embed + 'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/', + 'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44', + 'info_dict': { + 'id': '0_f2cfbpwy', + 'ext': 'mp4', + 'title': 'I. M. Pei: A Centennial Celebration', + 'description': 'md5:1db8f40c69edc46ca180ba30c567f37c', + 'upload_date': '20170403', + 'uploader_id': 'batchUser', + 'timestamp': 1491232186, + }, + 'add_ie': ['Kaltura'], + }, # Eagle.Platform embed (generic URL) { 'url': 'http://lenta.ru/news/2015/03/06/navalny/', @@ -2290,7 +2305,7 @@ class GenericIE(InfoExtractor): # Look for Kaltura embeds kaltura_url = KalturaIE._extract_url(webpage) if kaltura_url: - return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key()) + return self.url_result(smuggle_url(kaltura_url, {'source_url': url})) # Look for Eagle.Platform embeds eagleplatform_url = EaglePlatformIE._extract_url(webpage) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 54374ea76..f1e8b25cc 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -139,6 +139,13 @@ class KalturaIE(InfoExtractor): url = smuggle_url(url, {'service_url': service_url.group(1)}) return url + # Check for an iframe, which may require redirection. + mobj = re.search( + r"]+src=['\"](?P(https?:)?//www\.kaltura\.com/[^'\"]+)['\"]", + webpage) + if mobj: + return mobj.group('url') + def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs): params = actions[0] if len(actions) > 1: