From ad7f5cb12eb7796b010f5abad37b8f9656cccf87 Mon Sep 17 00:00:00 2001
From: 0x9fff00 <0x9fff00+git@protonmail.ch>
Date: Sun, 10 Feb 2019 00:06:15 +0100
Subject: [PATCH] [svtplay] Extract more metadata and higher resolution
 thumbnail (closes #18159)

---
 youtube_dl/extractor/svt.py | 108 ++++++++++++++++++++++++------------
 1 file changed, 74 insertions(+), 34 deletions(-)

diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py
index e12389cad..86029e128 100644
--- a/youtube_dl/extractor/svt.py
+++ b/youtube_dl/extractor/svt.py
@@ -9,15 +9,53 @@ from ..utils import (
     determine_ext,
     dict_get,
     int_or_none,
+    merge_dicts,
     str_or_none,
     strip_or_none,
     try_get,
+    unified_timestamp,
 )
 
 
 class SVTBaseIE(InfoExtractor):
     _GEO_COUNTRIES = ['SE']
 
+    def _extract_metadata(self, info, video_info):
+        title = video_info.get('title')
+
+        series = video_info.get('programTitle')
+        season_number = int_or_none(video_info.get('season'))
+        episode = video_info.get('episodeTitle')
+        episode_number = int_or_none(video_info.get('episodeNumber'))
+
+        thumbnail = video_info.get('thumbnail')
+        if thumbnail:
+            thumbnail = thumbnail.replace('{format}', 'extralarge')
+        description = video_info.get('description')
+        timestamp = unified_timestamp(try_get(video_info, (
+            lambda x: x['validFrom'], lambda x: x['rights']['validFrom'])))
+        duration = int_or_none(
+            dict_get(video_info, ('materialLength', 'contentDuration')))
+        age_limit = None
+        adult = dict_get(
+            video_info, ('inappropriateForChildren', 'blockedForChildren'),
+            skip_false_values=False)
+        if adult is not None:
+            age_limit = 18 if adult else 0
+
+        return merge_dicts(info, {
+            'title': title,
+            'thumbnail': thumbnail,
+            'description': description,
+            'timestamp': timestamp,
+            'duration': duration,
+            'age_limit': age_limit,
+            'series': series,
+            'season_number': season_number,
+            'episode': episode,
+            'episode_number': episode_number,
+        })
+
     def _extract_video(self, video_info, video_id):
         is_live = dict_get(video_info, ('live', 'simulcast'), default=False)
         m3u8_protocol = 'm3u8' if is_live else 'm3u8_native'
@@ -63,34 +101,12 @@ class SVTBaseIE(InfoExtractor):
 
                     subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url})
 
-        title = video_info.get('title')
-
-        series = video_info.get('programTitle')
-        season_number = int_or_none(video_info.get('season'))
-        episode = video_info.get('episodeTitle')
-        episode_number = int_or_none(video_info.get('episodeNumber'))
-
-        duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
-        age_limit = None
-        adult = dict_get(
-            video_info, ('inappropriateForChildren', 'blockedForChildren'),
-            skip_false_values=False)
-        if adult is not None:
-            age_limit = 18 if adult else 0
-
-        return {
+        return self._extract_metadata({
             'id': video_id,
-            'title': title,
             'formats': formats,
             'subtitles': subtitles,
-            'duration': duration,
-            'age_limit': age_limit,
-            'series': series,
-            'season_number': season_number,
-            'episode': episode,
-            'episode_number': episode_number,
             'is_live': is_live,
-        }
+        }, video_info)
 
 
 class SVTIE(SVTBaseIE):
@@ -156,6 +172,23 @@ class SVTPlayIE(SVTPlayBaseIE):
                 }]
             },
         },
+    }, {
+        'url': 'https://www.svtplay.se/video/21980718/kara-dagbok/kara-dagbok-sasong-1-avsnitt-8-1',
+        'md5': '45a7ca276a15bce3bb58a15f83f5e2cc',
+        'info_dict': {
+            'id': 'KyVERRZ',
+            'ext': 'mp4',
+            'title': 'Avsnitt 8',
+            'description': 'md5:512721ebad776bc05901effc0e2ac34e',
+            'timestamp': 1556064000,
+            'upload_date': '20190424',
+            'duration': 820,
+            'age_limit': 0,
+            'series': 'Kära dagbok',
+            'season_number': 1,
+            'episode': 'Avsnitt 8',
+            'episode_number': 8,
+        },
     }, {
         # geo restricted to Sweden
         'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
@@ -209,7 +242,7 @@ class SVTPlayIE(SVTPlayBaseIE):
                 group='json'),
             video_id, fatal=False)
 
-        thumbnail = self._og_search_thumbnail(webpage)
+        info_dict = {}
 
         if data:
             video_info = try_get(
@@ -217,18 +250,25 @@ class SVTPlayIE(SVTPlayBaseIE):
                 dict)
             if video_info:
                 info_dict = self._extract_video(video_info, video_id)
-                info_dict.update({
-                    'title': data['context']['dispatcher']['stores']['MetaStore']['title'],
-                    'thumbnail': thumbnail,
-                })
+                info_dict['title'] = data['context']['dispatcher']['stores']['MetaStore']['title']
                 self._adjust_title(info_dict)
-                return info_dict
 
-        svt_id = self._search_regex(
-            r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
-            webpage, 'video id')
+        if not info_dict:
+            svt_id = self._search_regex(
+                r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
+                webpage, 'video id')
 
-        return self._extract_by_video_id(svt_id, webpage)
+            info_dict = self._extract_by_video_id(svt_id, webpage)
+
+        if data:
+            video_data = try_get(data, lambda x: x['videoPage']['video'], dict)
+            if video_data:
+                info_dict = self._extract_metadata(info_dict, video_data)
+
+        if not info_dict.get('thumbnail'):
+            info_dict['thumbnail'] = self._og_search_thumbnail(webpage)
+
+        return info_dict
 
 
 class SVTSeriesIE(SVTPlayBaseIE):