From 0f6bfb1325eb7564fdfec2feaea74b43d11fd6d7 Mon Sep 17 00:00:00 2001 From: TinyToweringTree <54483833+TinyToweringTree@users.noreply.github.com> Date: Sat, 17 Aug 2019 22:50:51 +0200 Subject: [PATCH] [ard] Extract episode information --- youtube_dl/extractor/ard.py | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 7e028afd7..021d552b6 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -471,6 +471,34 @@ class ARDBetaMediathekIE(InfoExtractor): return result + def _extract_episode_info(self, title): + patterns = [ + r'.*(?P \(S(?P\d+)/E(?P\d+)\)).*', + r'.*(?P \((?:Folge |Teil )?(?P\d+)(?:/\d+)?\)).*', + r'.*(?PFolge (?P\d+)(?:\:| -|) )\"(?P.+)\".*', + r'.*(?PFolge (?P\d+)(?:\:| -|) ).*', + ] + res = {} + + for pattern in patterns: + m = re.match(pattern, title) + if m: + groupdict = m.groupdict() + for int_entry in ['season_number', 'episode_number']: + res[int_entry] = int_or_none(groupdict.get(int_entry)) + + for str_entry in ['episode']: + res[str_entry] = str_or_none(groupdict.get(str_entry)) + + if groupdict.get('ep_info') and not res['episode']: + res['episode'] = str_or_none(title.replace(groupdict.get('ep_info'), '')) + + if res['episode']: + res['episode'] = res['episode'].strip() + + break + + return res def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -480,6 +508,8 @@ class ARDBetaMediathekIE(InfoExtractor): webpage = self._download_webpage(url, display_id) data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json') data = self._parse_json(data_json, display_id) + #import json + #print(json.dumps(data, indent=2)) res = { 'id': video_id, @@ -559,8 +589,8 @@ class ARDBetaMediathekIE(InfoExtractor): if not formats and blocked_by_fsk: raise ExtractorError( - msg = 'This video is currently not available due to age restrictions (FSK %d). Try again from %02d:00 to 06:00.' % (res['age_limit'], 22 if res['age_limit'] < 18 else 23), - expected = True) + msg='This video is currently not available due to age restrictions (FSK %d). Try again from %02d:00 to 06:00.' % (res['age_limit'], 22 if res['age_limit'] < 18 else 23), + expected=True) self._sort_formats(formats) res.update({ @@ -568,4 +598,6 @@ class ARDBetaMediathekIE(InfoExtractor): 'formats': formats, }) + res.update(self._extract_episode_info(res.get('title'))) + return res