From 8ae3d215339a787edfe8fae8bc57d93c29315143 Mon Sep 17 00:00:00 2001 From: David Pedersen Date: Mon, 17 Jun 2019 18:18:53 +0200 Subject: [PATCH 1/7] [tvplay] Extract subtitles for Viafree (closes #12685) --- youtube_dl/extractor/tvplay.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index d82d48f94..6e4556746 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -310,7 +310,6 @@ class TVPlayIE(InfoExtractor): self._sort_formats(formats) - # TODO: webvtt in m3u8 subtitles = {} sami_path = video.get('sami_path') if sami_path: @@ -321,6 +320,24 @@ class TVPlayIE(InfoExtractor): 'url': sami_path, }] + subtitles_webvtt = video.get('subtitles_webvtt') + if subtitles_webvtt: + lang = self._search_regex( + r'_([a-z]{2})\.vtt', subtitles_webvtt, 'lang', + default=compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]) + subtitles[lang] = [{ + 'url': subtitles_webvtt, + }] + + subtitles_for_hearing_impaired = video.get('subtitles_for_hearing_impaired') + if subtitles_for_hearing_impaired: + lang = self._search_regex( + r'_([a-z]{2})_', subtitles_for_hearing_impaired, 'lang', + default=compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]) + subtitles[lang + '_sdh'] = [{ + 'url': subtitles_for_hearing_impaired, + }] + series = video.get('format_title') episode_number = int_or_none(video.get('format_position', {}).get('episode')) season = video.get('_embedded', {}).get('season', {}).get('title') From 4569db53ce6bf3ee9fd10b44a3423a42a56997b8 Mon Sep 17 00:00:00 2001 From: David Pedersen Date: Tue, 18 Jun 2019 09:14:53 +0200 Subject: [PATCH 2/7] Restore todo comment --- youtube_dl/extractor/tvplay.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 6e4556746..bcfce2379 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -310,6 +310,7 @@ class TVPlayIE(InfoExtractor): self._sort_formats(formats) + # TODO: webvtt in m3u8 subtitles = {} sami_path = video.get('sami_path') if sami_path: From 3ee59f9423fe6ebfdfb61cc58fe25deff930dd07 Mon Sep 17 00:00:00 2001 From: David Pedersen Date: Tue, 2 Jul 2019 12:54:27 +0200 Subject: [PATCH 3/7] Loop all subtitles for Viafree --- youtube_dl/extractor/tvplay.py | 39 ++++++++++++---------------------- 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index bcfce2379..c3ccda2ab 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -312,32 +312,19 @@ class TVPlayIE(InfoExtractor): # TODO: webvtt in m3u8 subtitles = {} - sami_path = video.get('sami_path') - if sami_path: - lang = self._search_regex( - r'_([a-z]{2})\.xml', sami_path, 'lang', - default=compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]) - subtitles[lang] = [{ - 'url': sami_path, - }] - - subtitles_webvtt = video.get('subtitles_webvtt') - if subtitles_webvtt: - lang = self._search_regex( - r'_([a-z]{2})\.vtt', subtitles_webvtt, 'lang', - default=compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]) - subtitles[lang] = [{ - 'url': subtitles_webvtt, - }] - - subtitles_for_hearing_impaired = video.get('subtitles_for_hearing_impaired') - if subtitles_for_hearing_impaired: - lang = self._search_regex( - r'_([a-z]{2})_', subtitles_for_hearing_impaired, 'lang', - default=compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]) - subtitles[lang + '_sdh'] = [{ - 'url': subtitles_for_hearing_impaired, - }] + sub_paths = [ + video.get('sami_path'), + video.get('subtitles_webvtt'), + video.get('subtitles_for_hearing_impaired'), + ] + for path in sub_paths: + if path: + lang = self._search_regex( + r'_(.*)(\.)', path, 'lang', + default=compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]) + subtitles[lang] = [{ + 'url': path, + }] series = video.get('format_title') episode_number = int_or_none(video.get('format_position', {}).get('episode')) From 2872ea449409331487960ea71c6ba0665cf0b831 Mon Sep 17 00:00:00 2001 From: David Pedersen Date: Thu, 11 Jul 2019 13:15:37 +0200 Subject: [PATCH 4/7] Get sub_paths with a loop --- youtube_dl/extractor/tvplay.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index c3ccda2ab..68828aa86 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -312,11 +312,7 @@ class TVPlayIE(InfoExtractor): # TODO: webvtt in m3u8 subtitles = {} - sub_paths = [ - video.get('sami_path'), - video.get('subtitles_webvtt'), - video.get('subtitles_for_hearing_impaired'), - ] + sub_paths = [video.get(key) for key in ['sami_path', 'subtitles_webvtt', 'subtitles_for_hearing_impaired']] for path in sub_paths: if path: lang = self._search_regex( From be7ed839418239ccdafdd0b6537b995c360d748e Mon Sep 17 00:00:00 2001 From: David Pedersen Date: Thu, 11 Jul 2019 13:23:24 +0200 Subject: [PATCH 5/7] Calculate regex default only once --- youtube_dl/extractor/tvplay.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 68828aa86..d3f83c490 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -313,11 +313,10 @@ class TVPlayIE(InfoExtractor): # TODO: webvtt in m3u8 subtitles = {} sub_paths = [video.get(key) for key in ['sami_path', 'subtitles_webvtt', 'subtitles_for_hearing_impaired']] + default = compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1] for path in sub_paths: if path: - lang = self._search_regex( - r'_(.*)(\.)', path, 'lang', - default=compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]) + lang = self._search_regex(r'_(.*)(\.)', path, 'lang', default) subtitles[lang] = [{ 'url': path, }] From 62535b93cf6d85edcc96953ed7b53c3d9f7eb79f Mon Sep 17 00:00:00 2001 From: David Pedersen Date: Tue, 4 Feb 2020 19:55:20 +0100 Subject: [PATCH 6/7] Add better regex for subtitles extraction --- youtube_dl/extractor/tvplay.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index d3f83c490..dfac70999 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -316,7 +316,7 @@ class TVPlayIE(InfoExtractor): default = compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1] for path in sub_paths: if path: - lang = self._search_regex(r'_(.*)(\.)', path, 'lang', default) + lang = self._search_regex(r'_([a-z]{2}(?:_sdh)?)\.(?:vtt|xml)', path, 'lang', default) subtitles[lang] = [{ 'url': path, }] From 76862a39dabb10a8efa77f4d474baa125c32511d Mon Sep 17 00:00:00 2001 From: David Pedersen Date: Tue, 4 Feb 2020 20:35:19 +0100 Subject: [PATCH 7/7] Add subtitles test for Viafree --- youtube_dl/extractor/tvplay.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index dfac70999..add4e6790 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -391,6 +391,33 @@ class ViafreeIE(InfoExtractor): 'skip_download': True, }, 'add_ie': [TVPlayIE.ie_key()], + }, { + # with subtitles + 'url': 'https://www.viafree.se/program/livsstil/varldens-basta-burgare/sasong-2/avsnitt-1', + 'info_dict': { + 'id': '828877', + 'ext': 'mp4', + 'title': 'Världens bästa burgare S02E01', + 'description': '', + 'series': 'Världens bästa burgare', + 'season': 'Säsong 2', + 'season_number': 2, + 'duration': 2580, + 'timestamp': 1491233172, + 'upload_date': '20170403', + 'subtitles': { + 'sv': [{ + 'ext': 'vtt', + }], + 'sv_sdh': [{ + 'ext': 'vtt', + }] + }, + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [TVPlayIE.ie_key()], }, { # Different og:image URL schema 'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2',