From 88199bf54c0941f5dec50fe08c9d3297bbe62c19 Mon Sep 17 00:00:00 2001 From: AndersVittrup Date: Sat, 4 Jan 2020 19:26:48 +0100 Subject: [PATCH] Added playlist both from series and season --- youtube_dl/extractor/drtv.py | 70 ++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 390e79f8c..786150e95 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -21,6 +21,8 @@ from ..utils import ( unified_timestamp, update_url_query, url_or_none, + base_url, + urljoin ) @@ -295,6 +297,74 @@ class DRTVIE(InfoExtractor): 'release_year': int_or_none(data.get('ProductionYear')), } +class DRTVPlaylistIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + (?:www\.)?(?:dr\.dk)/drtv/(?:serie|saeson)/ + ) + (?P[\da-z_-]+) + ''' + _TEST = { + 'url': 'https://www.dr.dk/drtv/serie/spise-med-price_43537', + 'info_dict': { + 'id': 'spise-med-price', + 'title': 'Spise med Price' + }, + 'playlist_mincount': 6, + } + + @classmethod + def suitable(cls, url): + return False if DRTVIE.suitable(url) else super( + DRTVPlaylistIE, cls).suitable(url) + + def _extract_series(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + episodes = [] + for season in re.finditer(r'href="(?P/drtv/saeson/.+?)"', webpage): + season_url = urljoin(base_url(url), season.group('url')) + episodes = episodes + self._extract_episode_from_season(season_url) + + return episodes + + def _extract_episode_from_season(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + episodes = [] + + for episode in re.finditer(r'href="(?P/drtv/se/.+?)"', webpage): + episode_url = urljoin(base_url(url), episode.group('url')) + episodes.append(episode_url) + + return episodes + + + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + + title = self._html_search_regex( + r'

(.+?)

', webpage, + 'title', default=None) + + if title: + title = re.sub(r'\s*\|\s*.+?$', '', title) + + episodes = [] + if 'serie' in url: + episodes = self._extract_series(url) + elif 'saeson' in url: + episodes = self._extract_episode_from_season(url) + + entries = [self.url_result(ep, ie=DRTVIE.ie_key()) for ep in episodes] + + return self.playlist_result(entries, playlist_id, title) + class DRTVLiveIE(InfoExtractor): IE_NAME = 'drtv:live'