From 22f7414a46d6929e546ded74edc003c67fbc2c2f Mon Sep 17 00:00:00 2001 From: AndersVittrup Date: Sat, 4 Jan 2020 19:27:12 +0100 Subject: [PATCH] Added playlist --- youtube_dl/extractor/discoverynetworks.py | 47 +++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/youtube_dl/extractor/discoverynetworks.py b/youtube_dl/extractor/discoverynetworks.py index 607a54948..505f6d529 100644 --- a/youtube_dl/extractor/discoverynetworks.py +++ b/youtube_dl/extractor/discoverynetworks.py @@ -3,8 +3,13 @@ from __future__ import unicode_literals import re +from .common import InfoExtractor from .dplay import DPlayIE +from ..utils import ( + urljoin +) + class DiscoveryNetworksDeIE(DPlayIE): _VALID_URL = r'https?://(?:www\.)?(?P(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show)/(?P[^/]+)/video/(?P[^/]+)' @@ -38,3 +43,45 @@ class DiscoveryNetworksDeIE(DPlayIE): return self._get_disco_api_info( url, '%s/%s' % (programme, alternate_id), 'sonic-eu1-prod.disco-api.com', realm, country) + + +class DiscoveryNetworksDePlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?P(?:tlc|dmax)\.de|dplay\.co\.uk)/(?Pprogramme|show)/(?P[^/]+)' + + _TESTS = [{ + 'url': 'https://www.dplay.co.uk/show/hairy-bikers-mississippi-adventure', + 'only_matching': True + },{ + 'url': 'https://www.dmax.de/programme/naked-survival', + 'only_matching': True + }] + + @classmethod + def suitable(cls, url): + return False if DiscoveryNetworksDeIE.suitable(url) else super( + DiscoveryNetworksDePlaylistIE, cls).suitable(url) + + def _extract_episodes(self, url, webpage, _type, program): + episodes = [] + for episode in re.finditer(r'"path":"' + program + r'(?P/.+?)"', webpage): + episode_url = urljoin( url, '/' + _type + '/' + program + '/video' + episode.group('episode')) + if episode_url not in episodes: + episodes.append(episode_url) + return episodes + + def _real_extract(self, url): + domain, _type, programme = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage(url, programme) + + title = self._html_search_regex( + r'
(.+?)
', webpage, + 'title', default=None) + + if title: + title = re.sub(r'\s*\|\s*.+?$', '', title) + + episodes = self._extract_episodes(url, webpage, _type, programme) + + entries = [self.url_result(ep, ie=DiscoveryNetworksDeIE.ie_key()) for ep in episodes] + + return self.playlist_result(entries, programme, title)