From fda6d237a5b664cc8a9a45562d4113c51fd0280d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Feb 2020 06:47:11 +0700 Subject: [PATCH 1/2] [wistia] Add support for multiple generic embeds (closes #8347, closes #11385) --- youtube_dl/extractor/generic.py | 17 +++++++++-------- youtube_dl/extractor/wistia.py | 31 ++++++++++++++++--------------- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 3c002472f..04c026984 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2537,14 +2537,15 @@ class GenericIE(InfoExtractor): dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key()) # Look for embedded Wistia player - wistia_url = WistiaIE._extract_url(webpage) - if wistia_url: - return { - '_type': 'url_transparent', - 'url': self._proto_relative_url(wistia_url), - 'ie_key': WistiaIE.ie_key(), - 'uploader': video_uploader, - } + wistia_urls = WistiaIE._extract_urls(webpage) + if wistia_urls: + playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key()) + for entry in playlist['entries']: + entry.update({ + '_type': 'url_transparent', + 'uploader': video_uploader, + }) + return playlist # Look for SVT player svt_url = SVTIE._extract_url(webpage) diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 085514d47..168e5e901 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -45,22 +45,23 @@ class WistiaIE(InfoExtractor): # https://wistia.com/support/embed-and-share/video-on-your-website @staticmethod def _extract_url(webpage): - match = re.search( - r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage) - if match: - return unescapeHTML(match.group('url')) + urls = WistiaIE._extract_urls(webpage) + return urls[0] if urls else None - match = re.search( - r'''(?sx) - ]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*? - ]+class=(["']).*?\bwistia_async_(?P[a-z0-9]{10})\b.*?\2 - ''', webpage) - if match: - return 'wistia:%s' % match.group('id') - - match = re.search(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P[a-z0-9]{10})', webpage) - if match: - return 'wistia:%s' % match.group('id') + @staticmethod + def _extract_urls(webpage): + urls = [] + for match in re.finditer( + r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage): + urls.append(unescapeHTML(match.group('url'))) + for match in re.finditer( + r'''(?sx) + ]+class=(["']).*?\bwistia_async_(?P[a-z0-9]{10})\b.*?\2 + ''', webpage): + urls.append('wistia:%s' % match.group('id')) + for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P[a-z0-9]{10})', webpage): + urls.append('wistia:%s' % match.group('id')) + return urls def _real_extract(self, url): video_id = self._match_id(url) From 00d798b7c25f0a03adb252c882df46abc8c23b1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Feb 2020 06:49:45 +0700 Subject: [PATCH 2/2] [teachable] Add support for multiple videos per lecture (closes #24101) --- youtube_dl/extractor/teachable.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/teachable.py b/youtube_dl/extractor/teachable.py index 6b7f13b43..cca89a4a8 100644 --- a/youtube_dl/extractor/teachable.py +++ b/youtube_dl/extractor/teachable.py @@ -160,8 +160,8 @@ class TeachableIE(TeachableBaseIE): webpage = self._download_webpage(url, video_id) - wistia_url = WistiaIE._extract_url(webpage) - if not wistia_url: + wistia_urls = WistiaIE._extract_urls(webpage) + if not wistia_urls: if any(re.search(p, webpage) for p in ( r'class=["\']lecture-contents-locked', r'>\s*Lecture contents locked', @@ -174,12 +174,14 @@ class TeachableIE(TeachableBaseIE): title = self._og_search_title(webpage, default=None) - return { + entries = [{ '_type': 'url_transparent', 'url': wistia_url, 'ie_key': WistiaIE.ie_key(), 'title': title, - } + } for wistia_url in wistia_urls] + + return self.playlist_result(entries, video_id, title) class TeachableCourseIE(TeachableBaseIE):