diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bbdb4a2fe..949c0c15b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -873,6 +873,7 @@ from .rutube import ( RutubeEmbedIE, RutubeMovieIE, RutubePersonIE, + RutubePlaylistIE, ) from .rutv import RUTVIE from .ruutu import RuutuIE diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index a0c84133b..72a44c0a4 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -193,3 +193,59 @@ class RutubePersonIE(RutubeChannelIE): }] _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' + +class RutubePlaylistIE(InfoExtractor): + IE_NAME = 'rutube:playlist' + IE_DESC = 'Rutube playlists' + _TESTS = [{ + 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source' + 'info_dict': { + 'id': '4252', + }, + 'playlist_count': 25, + }] + + _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P[\da-z]{32})/.+pl_id=(?P\d+).*$' + _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/source/%s/?page=%s' + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url) + playlist_id = m.group('pl_id') + return self._extract_playlist(playlist_id) + + def _extract_playlist(self, playlist_id): + entries = [] + for pagenum in itertools.count(1): + page_url = self._PAGE_TEMPLATE % (playlist_id, pagenum) + + # download_json will sent an accept: application/xml header + headers = {'Accept': 'application/json'} + page = self._download_json(page_url, playlist_id, + "Downloading metadata for page %s" % + pagenum, headers=headers) + + if not page['results']: + break + + results = page['results'] + for result in results: + entry = self.url_result(result['video_url'], 'Rutube') + entry['id'] = result['id'] + entry['uploader'] = result['author']['name'] + entry['uploader_id'] = result['author']['id'] + entry['upload_date'] = unified_strdate(result['created_ts']) + entry['title'] = result['title'] + entry['description'] = result['description'] + entry['thumbnail'] = result['thumbnail_url'] + entry['duration'] = result['duration'] + entry['category'] = result['category']['name'] + entry['age_limit'] = 18 if result['is_adult'] else 0 + entry['view_count'] = result['hits'] + entry['is_live'] = result['is_livestream'] + entry['webpage_url'] = result['video_url'] + entries.append(entry) + + if page['has_next'] is False: + break + + return self.playlist_result(entries, playlist_id, page['name'])