From d6f100b9603299fa4c9d802831b1282a96cec069 Mon Sep 17 00:00:00 2001 From: Jakob Klepp Date: Fri, 3 Apr 2020 20:19:32 +0200 Subject: [PATCH 1/2] [zdf] extract episode info if available --- youtube_dl/extractor/zdf.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 656864b2e..3224a4b0f 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import json import re from .common import InfoExtractor @@ -17,7 +18,7 @@ from ..utils import ( update_url_query, url_or_none, urljoin, -) + JSON_LD_RE) class ZDFBaseIE(InfoExtractor): @@ -37,6 +38,19 @@ class ZDFBaseIE(InfoExtractor): group='json'), video_id) + def _extract_episode_info(self, webpage): + season_number = self._search_regex(r"Staffel\ ([0-9]+)", webpage, "season", fatal=False) + episode_number = self._search_regex(r"Folge\ ([0-9]+)", webpage, "episode", fatal=False) + json_ld = json.loads(self._search_regex(JSON_LD_RE, webpage, 'JSON-LD', group='json_ld', fatal=False)) + episode = json_ld.get("name") + series = json_ld.get("publisher", {}).get("name") + return dict( + season_number=int_or_none(season_number), + episode_number=int_or_none(episode_number), + episode=episode.strip(), + series=series.strip() + ) + class ZDFIE(ZDFBaseIE): _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P[^/?]+)\.html' @@ -231,7 +245,10 @@ class ZDFIE(ZDFBaseIE): if webpage: player = self._extract_player(webpage, url, fatal=False) if player: - return self._extract_regular(url, player, video_id) + extracted = self._extract_regular(url, player, video_id) + episode_info = self._extract_episode_info(webpage) + extracted.update(episode_info) + return extracted return self._extract_mobile(video_id) From 468bf49cf6cda6d5530e35dccb79feb18408f4d4 Mon Sep 17 00:00:00 2001 From: Jakob Klepp Date: Fri, 3 Apr 2020 21:38:15 +0200 Subject: [PATCH 2/2] [zdf] Add testcase and improve extraction --- youtube_dl/extractor/zdf.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 3224a4b0f..e50d25ca0 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -39,11 +39,12 @@ class ZDFBaseIE(InfoExtractor): video_id) def _extract_episode_info(self, webpage): - season_number = self._search_regex(r"Staffel\ ([0-9]+)", webpage, "season", fatal=False) - episode_number = self._search_regex(r"Folge\ ([0-9]+)", webpage, "episode", fatal=False) - json_ld = json.loads(self._search_regex(JSON_LD_RE, webpage, 'JSON-LD', group='json_ld', fatal=False)) + season_number = self._search_regex(r"Staffel\ ([0-9]+)", webpage, "season", fatal=False, default=None) + episode_number = self._search_regex(r"Folge\ ([0-9]+)", webpage, "episode", fatal=False, default=None) + json_ld = json.loads(self._search_regex( + JSON_LD_RE, webpage, 'JSON-LD', group='json_ld', fatal=False, default='{}')) episode = json_ld.get("name") - series = json_ld.get("publisher", {}).get("name") + series = try_get(json_ld, lambda x: x['publisher']['name'], str) return dict( season_number=int_or_none(season_number), episode_number=int_or_none(episode_number), @@ -68,6 +69,20 @@ class ZDFIE(ZDFBaseIE): 'timestamp': 1465021200, 'upload_date': '20160604', }, + }, { + 'url': 'https://www.zdf.de/serien/bad-banks/schoene-neue-welt-138.html', + 'info_dict': { + 'id': 'schoene-neue-welt-138', + 'ext': 'flv', + 'description': 'md5:660826414ae02d93374783958250046d', + 'title': 'Schöne neue Welt', + 'timestamp': 1581194700, + 'upload_date': '20200208', + 'season_number': 2, + 'episode_number': 1, + 'episode': 'Schöne neue Welt', + 'series': 'Bad Banks' + } }, { 'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html', 'only_matching': True,