From a58f5b9c8391bc2363a132d83f3f122966c9d2f8 Mon Sep 17 00:00:00 2001 From: Kay B <> Date: Tue, 13 Mar 2018 16:39:30 +0100 Subject: [PATCH 1/3] [vimeo] fix album extraction (issue #15704) --- youtube_dl/extractor/vimeo.py | 39 +++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 08257147e..4cec26f58 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -27,6 +27,7 @@ from ..utils import ( urlencode_postdata, unescapeHTML, parse_filesize, + xpath_text, ) @@ -833,9 +834,16 @@ class VimeoAlbumIE(VimeoChannelIE): 'url': 'https://vimeo.com/album/2632481', 'info_dict': { 'id': '2632481', - 'title': 'Staff Favorites: November 2013', + 'title': 'Vimeo / Staff Favorites: November 2013', }, 'playlist_mincount': 13, + }, { + 'url': 'https://vimeo.com/album/4786409', + 'info_dict': { + 'id': '4786409', + 'title': 'Vimeo / NSSpain 2017', + }, + 'playlist_mincount': 25, }, { 'note': 'Password-protected album', 'url': 'https://vimeo.com/album/3253534', @@ -861,7 +869,34 @@ class VimeoAlbumIE(VimeoChannelIE): def _real_extract(self, url): album_id = self._match_id(url) - return self._extract_videos(album_id, 'https://vimeo.com/album/%s' % album_id) + rss_url = url + '/rss' + + doc = self._download_xml(rss_url, album_id, fatal=True) + + playlist_title = doc.find('./channel/title').text + playlist_desc_el = doc.find('./channel/description') + playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text + + entries = [] + for it in doc.findall('./channel/item'): + next_title = it.find('title').text + next_url = xpath_text(it, 'link', fatal=False) + if not next_url: + continue + + entries.append({ + '_type': 'url_transparent', + 'url': next_url, + 'title': next_title, + }) + + return { + '_type': 'playlist', + 'id': album_id, + 'title': playlist_title, + 'description': playlist_desc, + 'entries': entries, + } class VimeoGroupsIE(VimeoAlbumIE): From 02112b73bef144e64f84772b3929ca03f5ecfbc8 Mon Sep 17 00:00:00 2001 From: Kay B <> Date: Mon, 30 Apr 2018 22:58:58 +0200 Subject: [PATCH 2/3] [vimeo] incorporate review by dstftw on PR #15855 --- youtube_dl/extractor/vimeo.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 4cec26f58..509c5c9b5 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -834,14 +834,14 @@ class VimeoAlbumIE(VimeoChannelIE): 'url': 'https://vimeo.com/album/2632481', 'info_dict': { 'id': '2632481', - 'title': 'Vimeo / Staff Favorites: November 2013', + 'title': 'Staff Favorites: November 2013', }, 'playlist_mincount': 13, }, { 'url': 'https://vimeo.com/album/4786409', 'info_dict': { 'id': '4786409', - 'title': 'Vimeo / NSSpain 2017', + 'title': 'NSSpain 2017', }, 'playlist_mincount': 25, }, { @@ -871,16 +871,19 @@ class VimeoAlbumIE(VimeoChannelIE): album_id = self._match_id(url) rss_url = url + '/rss' - doc = self._download_xml(rss_url, album_id, fatal=True) + doc = self._download_xml(rss_url, album_id, fatal=False) playlist_title = doc.find('./channel/title').text + re_clean_title = re.compile('(?:Vimeo / )(.*)') + playlist_title = re_clean_title.findall(playlist_title)[0] + playlist_desc_el = doc.find('./channel/description') playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text entries = [] for it in doc.findall('./channel/item'): next_title = it.find('title').text - next_url = xpath_text(it, 'link', fatal=False) + next_url = xpath_text(it, 'link') if not next_url: continue From 6cf55c85dd1ffe25486a25820b5cc8178bf494bc Mon Sep 17 00:00:00 2001 From: Kay B <> Date: Tue, 1 May 2018 00:10:59 +0200 Subject: [PATCH 3/3] [vimeo] clean url when neccessary (PR #15855), add tests --- youtube_dl/extractor/vimeo.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 509c5c9b5..b498cd2d5 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -837,6 +837,20 @@ class VimeoAlbumIE(VimeoChannelIE): 'title': 'Staff Favorites: November 2013', }, 'playlist_mincount': 13, + }, { + 'url': 'https://vimeo.com/album/2632481/sort:plays/format:thumbnail', + 'info_dict': { + 'id': '2632481', + 'title': 'Staff Favorites: November 2013', + }, + 'playlist_mincount': 13, + }, { + 'url': 'https://vimeo.com/album/2632481/page:2/sort:plays/format:thumbnail', + 'info_dict': { + 'id': '2632481', + 'title': 'Staff Favorites: November 2013', + }, + 'playlist_mincount': 13, }, { 'url': 'https://vimeo.com/album/4786409', 'info_dict': { @@ -869,7 +883,12 @@ class VimeoAlbumIE(VimeoChannelIE): def _real_extract(self, url): album_id = self._match_id(url) - rss_url = url + '/rss' + + # we only want the base url with the id, excluding possibly appended + # options like e.g 'sort:plays'. + re_clean_url = re.compile(r'https://vimeo\.com/album/\d+') + clean_url = re_clean_url.findall(url)[0] + rss_url = clean_url + '/rss' doc = self._download_xml(rss_url, album_id, fatal=False)