From a361b9f789197d40e75208e02f92a936d67df240 Mon Sep 17 00:00:00 2001 From: Mart Date: Sat, 25 Aug 2018 13:32:23 +0200 Subject: [PATCH 1/5] [Sporza] Add extractor for new url format --- youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/vrt.py | 65 ++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c7a91a986..6e43ebe1a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1317,7 +1317,10 @@ from .voxmedia import ( VoxMediaIE, ) from .vporn import VpornIE -from .vrt import VRTIE +from .vrt import ( + VRTIE, + SporzaIE, +) from .vrak import VrakIE from .vrv import ( VRVIE, diff --git a/youtube_dl/extractor/vrt.py b/youtube_dl/extractor/vrt.py index 444295d68..d8810613a 100644 --- a/youtube_dl/extractor/vrt.py +++ b/youtube_dl/extractor/vrt.py @@ -152,3 +152,68 @@ class VRTIE(InfoExtractor): 'duration': duration, 'formats': formats, } + + +class SporzaIE(InfoExtractor): + _VALID_URL = r'https?://sporza\.be/nl/(?P[^/]+)/(?P[^/]+)/(?P[^/]+)/(?P[^/]+)/*' + _TESTS = [{ + 'url': 'https://sporza.be/nl/2018/08/20/israel-is-geen-partij-voor-de-yellow-tigers/', + 'md5': 'b13b66a4b95daccf2ada6b3ca94109c6', + 'info_dict': { + 'id': 'vid-f3d9b1c6-5c8b-414c-a2ba-9c895e50c890', + 'ext': 'mp4', + 'title': 'Israël is geen partij voor de Yellow Tigers', + 'description': 'Israël is geen partij voor de Yellow Tigers', + 'thumbnail': 'https://images.vrt.be/orig/2018/08/20/152c3089-a470-11e8-abcc-02b7b76bf47f.jpg', + }, + }, + { + 'url': 'https://sporza.be/nl/2018/07/29/de-tour-van-thomas/', + 'md5': '267213350047577b614ee9804dd5b0c8', + 'info_dict': { + 'id': 'vid-155c6577-addc-48d3-b86f-1d66f19d6bcc', + 'ext': 'mp4', + 'title': 'De Tour van Thomas', + 'description': 'De Tour van Thomas', + 'thumbnail': 'https://images.vrt.be/orig/2018/07/29/b9ad0d38-9376-11e8-abcc-02b7b76bf47f.jpg', + }, + } + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_id = self._search_regex( + r'data-videoid=\"([^\"]+)\"', webpage, 'video id', fatal=True) + publication_id = self._search_regex( + r'data-publicationid=\"([^\"]+)\"', webpage, 'publication id', fatal=True) + api_url = self._search_regex( + r'data-mediaapiurl=\"([^\"]+)\"', webpage, 'api url', + default="https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v1") + video_client = self._search_regex( + r'data-client=\"([^\"]+)\"', webpage, 'video client', default="sporza") + + # Get a player token + vrtPlayerToken = self._download_json( + "https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v1/tokens", + video_id, headers={"content-type": ""}, data={}).get( + "vrtPlayerToken") # Default content type results in 415 + + src = api_url + "/videos/" + publication_id + "$" + video_id + "/?vrtPlayerToken=" + vrtPlayerToken + "&client=" + video_client + meta = self._download_json(src, video_id) + + formats = self._extract_m3u8_formats(meta["targetUrls"][0]["url"], video_id) + # Set the extention as the m3u8 extractor doesn't do this. + # VLC doesn't play nice with .m3u8 files from sporza.be + for i in formats: + i['ext'] = "mp4" + + return { + 'id': video_id, + 'title': meta.get('title') or self._og_search_title(webpage), + 'description': meta.get('shortDescription'), + 'thumbnail': meta.get('posterImageUrl'), + 'duration': meta.get('duration'), + 'formats': formats, + } From d71b9c3c56834467c8fe989dae3e286580801ab9 Mon Sep 17 00:00:00 2001 From: Mart Date: Sat, 25 Aug 2018 18:33:59 +0200 Subject: [PATCH 2/5] [Sporza] Changes based on feedback on https://github.com/rg3/youtube-dl/pull/17327/files/9650f240d5d5b31b10754cf59d8d1d9e70383a15 --- youtube_dl/extractor/vrt.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/vrt.py b/youtube_dl/extractor/vrt.py index d8810613a..79fdf4d69 100644 --- a/youtube_dl/extractor/vrt.py +++ b/youtube_dl/extractor/vrt.py @@ -155,7 +155,7 @@ class VRTIE(InfoExtractor): class SporzaIE(InfoExtractor): - _VALID_URL = r'https?://sporza\.be/nl/(?P[^/]+)/(?P[^/]+)/(?P[^/]+)/(?P[^/]+)/*' + _VALID_URL = r'https?://sporza\.be/nl/(?:[^/]+/)+(?P[^/]+)/*' _TESTS = [{ 'url': 'https://sporza.be/nl/2018/08/20/israel-is-geen-partij-voor-de-yellow-tigers/', 'md5': 'b13b66a4b95daccf2ada6b3ca94109c6', @@ -164,7 +164,7 @@ class SporzaIE(InfoExtractor): 'ext': 'mp4', 'title': 'Israël is geen partij voor de Yellow Tigers', 'description': 'Israël is geen partij voor de Yellow Tigers', - 'thumbnail': 'https://images.vrt.be/orig/2018/08/20/152c3089-a470-11e8-abcc-02b7b76bf47f.jpg', + 'thumbnail': r're:^https?://images.vrt.be/.*\.jpg$', }, }, { @@ -175,7 +175,7 @@ class SporzaIE(InfoExtractor): 'ext': 'mp4', 'title': 'De Tour van Thomas', 'description': 'De Tour van Thomas', - 'thumbnail': 'https://images.vrt.be/orig/2018/07/29/b9ad0d38-9376-11e8-abcc-02b7b76bf47f.jpg', + 'thumbnail': r're:^https?://images.vrt.be/.*\.jpg$', }, } ] @@ -203,11 +203,7 @@ class SporzaIE(InfoExtractor): src = api_url + "/videos/" + publication_id + "$" + video_id + "/?vrtPlayerToken=" + vrtPlayerToken + "&client=" + video_client meta = self._download_json(src, video_id) - formats = self._extract_m3u8_formats(meta["targetUrls"][0]["url"], video_id) - # Set the extention as the m3u8 extractor doesn't do this. - # VLC doesn't play nice with .m3u8 files from sporza.be - for i in formats: - i['ext'] = "mp4" + formats = self._extract_m3u8_formats(meta["targetUrls"][0]["url"], video_id, "mp4") return { 'id': video_id, From f743511503b1d9976a83a721ca5f8c64c0e24bd6 Mon Sep 17 00:00:00 2001 From: Mart Date: Sat, 25 Aug 2018 18:39:08 +0200 Subject: [PATCH 3/5] [Sporza] More robust targetUrl selection --- youtube_dl/extractor/vrt.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vrt.py b/youtube_dl/extractor/vrt.py index 79fdf4d69..0e08660da 100644 --- a/youtube_dl/extractor/vrt.py +++ b/youtube_dl/extractor/vrt.py @@ -203,7 +203,8 @@ class SporzaIE(InfoExtractor): src = api_url + "/videos/" + publication_id + "$" + video_id + "/?vrtPlayerToken=" + vrtPlayerToken + "&client=" + video_client meta = self._download_json(src, video_id) - formats = self._extract_m3u8_formats(meta["targetUrls"][0]["url"], video_id, "mp4") + targetUrl = next((x for x in meta["targetUrls"] if x["type"]=="hls")).get("url") + formats = self._extract_m3u8_formats(targetUrl, video_id, "mp4") return { 'id': video_id, From b12bfd5bb285e4d812eebb22c0db52f4c8c742a6 Mon Sep 17 00:00:00 2001 From: Maocx Date: Tue, 28 Aug 2018 17:27:48 +0200 Subject: [PATCH 4/5] Sending correct content-type Added content-type from https://github.com/pietje666/plugin.video.vrt.nu/issues/21. --- youtube_dl/extractor/vrt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vrt.py b/youtube_dl/extractor/vrt.py index 0e08660da..829617286 100644 --- a/youtube_dl/extractor/vrt.py +++ b/youtube_dl/extractor/vrt.py @@ -197,8 +197,8 @@ class SporzaIE(InfoExtractor): # Get a player token vrtPlayerToken = self._download_json( "https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v1/tokens", - video_id, headers={"content-type": ""}, data={}).get( - "vrtPlayerToken") # Default content type results in 415 + video_id, headers={"content-type": "application/json"}, data={}).get( + "vrtPlayerToken") # Default content type results in 415, correct content-type from: https://github.com/pietje666/plugin.video.vrt.nu/issues/21 src = api_url + "/videos/" + publication_id + "$" + video_id + "/?vrtPlayerToken=" + vrtPlayerToken + "&client=" + video_client meta = self._download_json(src, video_id) From a045fe9c598a0d151e70a25463fcd6595133d62f Mon Sep 17 00:00:00 2001 From: Maocx Date: Fri, 31 Aug 2018 17:31:35 +0200 Subject: [PATCH 5/5] Made api endpoint dynamic Made api-endpoint more robust to change. --- youtube_dl/extractor/vrt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vrt.py b/youtube_dl/extractor/vrt.py index 829617286..2303b5195 100644 --- a/youtube_dl/extractor/vrt.py +++ b/youtube_dl/extractor/vrt.py @@ -196,7 +196,7 @@ class SporzaIE(InfoExtractor): # Get a player token vrtPlayerToken = self._download_json( - "https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v1/tokens", + api_url + "/tokens", video_id, headers={"content-type": "application/json"}, data={}).get( "vrtPlayerToken") # Default content type results in 415, correct content-type from: https://github.com/pietje666/plugin.video.vrt.nu/issues/21