From 073741b132a1c2a67e7580a4df971938e47c7bcc Mon Sep 17 00:00:00 2001 From: fnord Date: Tue, 23 Jun 2015 02:13:26 -0500 Subject: [PATCH 1/2] Generic/VimeoIE: Return a playlist of all vimeo iframes/embeds (try2) Vimeo extraction logic is restricted to the first video only, even if a page has multiple videos: youtube-dl http://www.theguardian.com/environment/2015/jun/10/climate-change-has-left-us-exposed-in-arctic-say-military-experts --get-title WARNING: Falling back on generic information extractor. The Climate 25: Sherri Goodman After patch: youtube-dl http://www.theguardian.com/environment/2015/jun/10/climate-change-has-left-us-exposed-in-arctic-say-military-experts --get-title WARNING: Falling back on generic information extractor. The Climate 25: Sherri Goodman The Climate 25: David Titley The Climate 25: Steve Cheney The Climate 25: General Charles Jacoby The Climate 25: Henry Paulson --- youtube_dl/extractor/generic.py | 6 +++--- youtube_dl/extractor/vimeo.py | 18 +++++++----------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 5c03fddc6..da493b7ec 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1124,9 +1124,9 @@ class GenericIE(InfoExtractor): if matches: return _playlist_from_matches(matches, ie='RtlNl') - vimeo_url = VimeoIE._extract_vimeo_url(url, webpage) - if vimeo_url is not None: - return self.url_result(vimeo_url) + vimeo_urls = VimeoIE._extract_vimeo_urls(url, webpage) + if vimeo_urls is not None: + return _playlist_from_matches(vimeo_urls, ie='Vimeo') # Look for embedded YouTube player matches = re.findall(r'''(?x) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index cae90205d..63cbb3e66 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -175,19 +175,15 @@ class VimeoIE(VimeoBaseInfoExtractor): ] @staticmethod - def _extract_vimeo_url(url, webpage): + def _extract_vimeo_urls(url, webpage): # Look for embedded (iframe) Vimeo player - mobj = re.search( - r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage) + mobj = re.findall( + r'<(?:iframe|embed)[^>]+?src=(["\'])(?P(?:https?:)?//(?:player\.vimeo\.com/video/|(?:www\.)?vimeo\.com/moogaloop\.swf).+?)\1', webpage) if mobj: - player_url = unescapeHTML(mobj.group('url')) - surl = smuggle_url(player_url, {'Referer': url}) - return surl - # Look for embedded (swf embed) Vimeo player - mobj = re.search( - r']+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) - if mobj: - return mobj.group(1) + return map( + lambda m: smuggle_url(unescapeHTML(m[1]), {'Referer': url}), + mobj + ) def _verify_video_password(self, url, video_id, webpage): password = self._downloader.params.get('videopassword', None) From 91925d37e80e17d19a183e614728764b20f5f892 Mon Sep 17 00:00:00 2001 From: fnord Date: Tue, 23 Jun 2015 09:02:08 -0500 Subject: [PATCH 2/2] unbreak tumblr-vimeo (c5895d5) --- youtube_dl/extractor/tumblr.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index 9ead13a91..37935e935 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -72,9 +72,14 @@ class TumblrIE(InfoExtractor): if pornhub_url: return self.url_result(pornhub_url, 'PornHub') - vimeo_url = VimeoIE._extract_vimeo_url(url, webpage) - if vimeo_url: - return self.url_result(vimeo_url, 'Vimeo') + vimeo_urls = VimeoIE._extract_vimeo_urls(url, webpage) + if vimeo_urls: + entries = [{ + '_type': 'url', + 'url': vurl, + 'ie_key': 'Vimeo' + } for vurl in vimeo_urls] + return self.playlist_result(entries) iframe_url = self._search_regex( r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'',