diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 390e79f8c..ae5594471 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -21,6 +21,8 @@ from ..utils import ( unified_timestamp, update_url_query, url_or_none, + urljoin, + base_url ) @@ -296,6 +298,73 @@ class DRTVIE(InfoExtractor): } +class DRTVPlaylistIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + (?:www\.)?(?:dr\.dk)/drtv/(?:serie|saeson)/ + ) + (?P[\da-z_-]+) + ''' + _TEST = { + 'url': 'https://www.dr.dk/drtv/serie/spise-med-price_43537', + 'info_dict': { + 'id': 'spise-med-price_43537', + 'title': 'Spise med Price' + }, + 'playlist_mincount': 2, + } + + @classmethod + def suitable(cls, url): + return False if DRTVIE.suitable(url) else super( + DRTVPlaylistIE, cls).suitable(url) + + def _extract_series(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + episodes = [] + for season in re.finditer(r'href="(?P/drtv/saeson/.+?)"', webpage): + season_url = urljoin(base_url(url), season.group('url')) + episodes = episodes + self._extract_episode_from_season(season_url) + + return episodes + + def _extract_episode_from_season(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + episodes = [] + + for episode in re.finditer(r'href="(?P/drtv/se/.+?)"', webpage): + episode_url = urljoin(base_url(url), episode.group('url')) + episodes.append(episode_url) + + return episodes + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + + title = self._html_search_regex( + r'

(.+?)

', webpage, + 'title', default=None) + + if title: + title = re.sub(r'\s*\|\s*.+?$', '', title) + + episodes = [] + if 'serie' in url: + episodes = self._extract_series(url) + elif 'saeson' in url: + episodes = self._extract_episode_from_season(url) + + entries = [self.url_result(ep, ie=DRTVIE.ie_key()) for ep in episodes] + + return self.playlist_result(entries, playlist_id, title) + + class DRTVLiveIE(InfoExtractor): IE_NAME = 'drtv:live' _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P[\da-z-]+)' diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7b05f5410..4b8179315 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -277,6 +277,7 @@ from .drbonanza import DRBonanzaIE from .drtuber import DrTuberIE from .drtv import ( DRTVIE, + DRTVPlaylistIE, DRTVLiveIE, ) from .dtube import DTubeIE