From 073741b132a1c2a67e7580a4df971938e47c7bcc Mon Sep 17 00:00:00 2001 From: fnord Date: Tue, 23 Jun 2015 02:13:26 -0500 Subject: [PATCH] Generic/VimeoIE: Return a playlist of all vimeo iframes/embeds (try2) Vimeo extraction logic is restricted to the first video only, even if a page has multiple videos: youtube-dl http://www.theguardian.com/environment/2015/jun/10/climate-change-has-left-us-exposed-in-arctic-say-military-experts --get-title WARNING: Falling back on generic information extractor. The Climate 25: Sherri Goodman After patch: youtube-dl http://www.theguardian.com/environment/2015/jun/10/climate-change-has-left-us-exposed-in-arctic-say-military-experts --get-title WARNING: Falling back on generic information extractor. The Climate 25: Sherri Goodman The Climate 25: David Titley The Climate 25: Steve Cheney The Climate 25: General Charles Jacoby The Climate 25: Henry Paulson --- youtube_dl/extractor/generic.py | 6 +++--- youtube_dl/extractor/vimeo.py | 18 +++++++----------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 5c03fddc6..da493b7ec 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1124,9 +1124,9 @@ class GenericIE(InfoExtractor): if matches: return _playlist_from_matches(matches, ie='RtlNl') - vimeo_url = VimeoIE._extract_vimeo_url(url, webpage) - if vimeo_url is not None: - return self.url_result(vimeo_url) + vimeo_urls = VimeoIE._extract_vimeo_urls(url, webpage) + if vimeo_urls is not None: + return _playlist_from_matches(vimeo_urls, ie='Vimeo') # Look for embedded YouTube player matches = re.findall(r'''(?x) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index cae90205d..63cbb3e66 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -175,19 +175,15 @@ class VimeoIE(VimeoBaseInfoExtractor): ] @staticmethod - def _extract_vimeo_url(url, webpage): + def _extract_vimeo_urls(url, webpage): # Look for embedded (iframe) Vimeo player - mobj = re.search( - r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage) + mobj = re.findall( + r'<(?:iframe|embed)[^>]+?src=(["\'])(?P(?:https?:)?//(?:player\.vimeo\.com/video/|(?:www\.)?vimeo\.com/moogaloop\.swf).+?)\1', webpage) if mobj: - player_url = unescapeHTML(mobj.group('url')) - surl = smuggle_url(player_url, {'Referer': url}) - return surl - # Look for embedded (swf embed) Vimeo player - mobj = re.search( - r']+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) - if mobj: - return mobj.group(1) + return map( + lambda m: smuggle_url(unescapeHTML(m[1]), {'Referer': url}), + mobj + ) def _verify_video_password(self, url, video_id, webpage): password = self._downloader.params.get('videopassword', None)