From ad7f5cb12eb7796b010f5abad37b8f9656cccf87 Mon Sep 17 00:00:00 2001 From: 0x9fff00 <0x9fff00+git@protonmail.ch> Date: Sun, 10 Feb 2019 00:06:15 +0100 Subject: [PATCH] [svtplay] Extract more metadata and higher resolution thumbnail (closes #18159) --- youtube_dl/extractor/svt.py | 108 ++++++++++++++++++++++++------------ 1 file changed, 74 insertions(+), 34 deletions(-) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index e12389cad..86029e128 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -9,15 +9,53 @@ from ..utils import ( determine_ext, dict_get, int_or_none, + merge_dicts, str_or_none, strip_or_none, try_get, + unified_timestamp, ) class SVTBaseIE(InfoExtractor): _GEO_COUNTRIES = ['SE'] + def _extract_metadata(self, info, video_info): + title = video_info.get('title') + + series = video_info.get('programTitle') + season_number = int_or_none(video_info.get('season')) + episode = video_info.get('episodeTitle') + episode_number = int_or_none(video_info.get('episodeNumber')) + + thumbnail = video_info.get('thumbnail') + if thumbnail: + thumbnail = thumbnail.replace('{format}', 'extralarge') + description = video_info.get('description') + timestamp = unified_timestamp(try_get(video_info, ( + lambda x: x['validFrom'], lambda x: x['rights']['validFrom']))) + duration = int_or_none( + dict_get(video_info, ('materialLength', 'contentDuration'))) + age_limit = None + adult = dict_get( + video_info, ('inappropriateForChildren', 'blockedForChildren'), + skip_false_values=False) + if adult is not None: + age_limit = 18 if adult else 0 + + return merge_dicts(info, { + 'title': title, + 'thumbnail': thumbnail, + 'description': description, + 'timestamp': timestamp, + 'duration': duration, + 'age_limit': age_limit, + 'series': series, + 'season_number': season_number, + 'episode': episode, + 'episode_number': episode_number, + }) + def _extract_video(self, video_info, video_id): is_live = dict_get(video_info, ('live', 'simulcast'), default=False) m3u8_protocol = 'm3u8' if is_live else 'm3u8_native' @@ -63,34 +101,12 @@ class SVTBaseIE(InfoExtractor): subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url}) - title = video_info.get('title') - - series = video_info.get('programTitle') - season_number = int_or_none(video_info.get('season')) - episode = video_info.get('episodeTitle') - episode_number = int_or_none(video_info.get('episodeNumber')) - - duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration'))) - age_limit = None - adult = dict_get( - video_info, ('inappropriateForChildren', 'blockedForChildren'), - skip_false_values=False) - if adult is not None: - age_limit = 18 if adult else 0 - - return { + return self._extract_metadata({ 'id': video_id, - 'title': title, 'formats': formats, 'subtitles': subtitles, - 'duration': duration, - 'age_limit': age_limit, - 'series': series, - 'season_number': season_number, - 'episode': episode, - 'episode_number': episode_number, 'is_live': is_live, - } + }, video_info) class SVTIE(SVTBaseIE): @@ -156,6 +172,23 @@ class SVTPlayIE(SVTPlayBaseIE): }] }, }, + }, { + 'url': 'https://www.svtplay.se/video/21980718/kara-dagbok/kara-dagbok-sasong-1-avsnitt-8-1', + 'md5': '45a7ca276a15bce3bb58a15f83f5e2cc', + 'info_dict': { + 'id': 'KyVERRZ', + 'ext': 'mp4', + 'title': 'Avsnitt 8', + 'description': 'md5:512721ebad776bc05901effc0e2ac34e', + 'timestamp': 1556064000, + 'upload_date': '20190424', + 'duration': 820, + 'age_limit': 0, + 'series': 'Kära dagbok', + 'season_number': 1, + 'episode': 'Avsnitt 8', + 'episode_number': 8, + }, }, { # geo restricted to Sweden 'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten', @@ -209,7 +242,7 @@ class SVTPlayIE(SVTPlayBaseIE): group='json'), video_id, fatal=False) - thumbnail = self._og_search_thumbnail(webpage) + info_dict = {} if data: video_info = try_get( @@ -217,18 +250,25 @@ class SVTPlayIE(SVTPlayBaseIE): dict) if video_info: info_dict = self._extract_video(video_info, video_id) - info_dict.update({ - 'title': data['context']['dispatcher']['stores']['MetaStore']['title'], - 'thumbnail': thumbnail, - }) + info_dict['title'] = data['context']['dispatcher']['stores']['MetaStore']['title'] self._adjust_title(info_dict) - return info_dict - svt_id = self._search_regex( - r']+data-video-id=["\']([\da-zA-Z-]+)', - webpage, 'video id') + if not info_dict: + svt_id = self._search_regex( + r']+data-video-id=["\']([\da-zA-Z-]+)', + webpage, 'video id') - return self._extract_by_video_id(svt_id, webpage) + info_dict = self._extract_by_video_id(svt_id, webpage) + + if data: + video_data = try_get(data, lambda x: x['videoPage']['video'], dict) + if video_data: + info_dict = self._extract_metadata(info_dict, video_data) + + if not info_dict.get('thumbnail'): + info_dict['thumbnail'] = self._og_search_thumbnail(webpage) + + return info_dict class SVTSeriesIE(SVTPlayBaseIE):