From 4905e589d74e72fccb6e050d4e0db0d96c8e3930 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Sat, 8 Apr 2017 04:21:52 -0400 Subject: [PATCH] [kaltura] Be rigorous on iframe Per @dstftw, don't pull out just any kaltura.com iframes, make sure they have /p/{PARTNER_ID} and &entry_id={ENTRY_ID} and return a kaltura: URL. Go back to specifying the IE is Kaltura in url_result(). --- youtube_dl/extractor/generic.py | 2 +- youtube_dl/extractor/kaltura.py | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 4bfa3f8a1..658533cf6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2305,7 +2305,7 @@ class GenericIE(InfoExtractor): # Look for Kaltura embeds kaltura_url = KalturaIE._extract_url(webpage) if kaltura_url: - return self.url_result(smuggle_url(kaltura_url, {'source_url': url})) + return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key()) # Look for Eagle.Platform embeds eagleplatform_url = EaglePlatformIE._extract_url(webpage) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 741dd8dc5..6e992ee4b 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -128,7 +128,17 @@ class KalturaIE(InfoExtractor): (?P["\'])entry_?[Ii]d(?P=q2) )\s*:\s* (?P["\'])(?P(?:(?!(?P=q3)).)+)(?P=q3) - ''', webpage)) + ''', webpage) or + re.search( + # + r'''(?xs) + (?P["\']) + (?:https?:)?//(?:www\.)?kaltura\.com/p/(?P\d+)/ + (?:(?!(?P=q1)).)* + [\?&]entry_id=(?P(?:(?!(?P=q1))[^&])+) + (?P=q1) + ''', webpage) + ) if mobj: embed_info = mobj.groupdict() url = 'kaltura:%(partner_id)s:%(id)s' % embed_info @@ -140,13 +150,6 @@ class KalturaIE(InfoExtractor): url = smuggle_url(url, {'service_url': service_url.group(1)}) return url - # Check for an iframe, which may require redirection. - mobj = re.search( - r"]+src=['\"](?P(https?:)?//www\.kaltura\.com/[^'\"]+)['\"]", - webpage) - if mobj: - return mobj.group('url') - def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs): params = actions[0] if len(actions) > 1: