From d813976ba714ab62c7b90174bb696fc9c3219e7e Mon Sep 17 00:00:00 2001 From: Jens Rutschmann Date: Sat, 1 Sep 2018 19:43:34 +0200 Subject: [PATCH 1/3] Added info extractor for www.tele5.de, a German TV station. --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tele5.py | 58 ++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 youtube_dl/extractor/tele5.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 995af9988..7dc569724 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1086,6 +1086,7 @@ from .teachingchannel import TeachingChannelIE from .teamcoco import TeamcocoIE from .techtalks import TechTalksIE from .ted import TEDIE +from .tele5 import Tele5IE from .tele13 import Tele13IE from .telebruxelles import TeleBruxellesIE from .telecinco import TelecincoIE diff --git a/youtube_dl/extractor/tele5.py b/youtube_dl/extractor/tele5.py new file mode 100644 index 000000000..7b0292d68 --- /dev/null +++ b/youtube_dl/extractor/tele5.py @@ -0,0 +1,58 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from .nexx import NexxIE + + +class Tele5IE(InfoExtractor): + _VALID_URL = r'https://www.tele5.de/(mediathek/filme-online/videos|tv/).*' + + _TESTS = [{ + 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1550589', + 'info_dict': { + 'id': '1550589', + 'ext': 'mp4', + 'upload_date': '20180822', + 'timestamp': 1534927316, + 'title': 'SchleFaZ: Atomic Shark' + } + }, { + 'url': 'https://www.tele5.de/tv/dark-matter/videos', + 'info_dict': { + 'id': '1548206', + 'ext': 'mp4', + 'title': 'Folge Sechsundzwanzig', + 'timestamp': 1533664358, + 'upload_date': '20180807' + } + }, { + 'url': 'https://www.tele5.de/tv/relic-hunter/videos', + 'info_dict': { + 'id': '1548034', + 'ext': 'mp4', + 'timestamp': 1533577964, + 'upload_date': '20180806', + 'title': 'Mr. Right' + } + }, { + 'url': 'https://www.tele5.de/tv/buffy-im-bann-der-daemonen/videos', + 'info_dict': { + 'id': '1547129', + 'ext': 'mp4', + 'upload_date': '20180730', + 'timestamp': 1532967491, + 'title': 'Der Höllenhund' + } + }] + + def _real_extract(self, url): + webpage = self._download_webpage(url, 'N/A') + + id = self._html_search_regex( + r'class="ce_videoelementnexx-video__player"\sid="video-player"\sdata-id="(?P[0-9]+)"', + webpage, 'id') + + return self.url_result( + 'https://api.nexx.cloud/v3/759/videos/byid/%s' + % id, ie=NexxIE.ie_key()) From 1c14824282f206d4a1bd081c86589893b1dc07bc Mon Sep 17 00:00:00 2001 From: Jens Rutschmann Date: Sun, 2 Sep 2018 13:40:15 +0200 Subject: [PATCH 2/3] Implemented requested changes. - Removed unused capture group from URL regex. - Removed tests for additional series with similar names. - Relaxed regex for parsing the video ID from webpage HTML. - Use video_id instead of id to prevent shadowing of built-in names. --- youtube_dl/extractor/tele5.py | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/tele5.py b/youtube_dl/extractor/tele5.py index 7b0292d68..f9990d23e 100644 --- a/youtube_dl/extractor/tele5.py +++ b/youtube_dl/extractor/tele5.py @@ -6,7 +6,7 @@ from .nexx import NexxIE class Tele5IE(InfoExtractor): - _VALID_URL = r'https://www.tele5.de/(mediathek/filme-online/videos|tv/).*' + _VALID_URL = r'https://www.tele5.de/[mediathek/filme-online/videos|tv/]' _TESTS = [{ 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1550589', @@ -26,33 +26,15 @@ class Tele5IE(InfoExtractor): 'timestamp': 1533664358, 'upload_date': '20180807' } - }, { - 'url': 'https://www.tele5.de/tv/relic-hunter/videos', - 'info_dict': { - 'id': '1548034', - 'ext': 'mp4', - 'timestamp': 1533577964, - 'upload_date': '20180806', - 'title': 'Mr. Right' - } - }, { - 'url': 'https://www.tele5.de/tv/buffy-im-bann-der-daemonen/videos', - 'info_dict': { - 'id': '1547129', - 'ext': 'mp4', - 'upload_date': '20180730', - 'timestamp': 1532967491, - 'title': 'Der Höllenhund' - } }] def _real_extract(self, url): webpage = self._download_webpage(url, 'N/A') - id = self._html_search_regex( - r'class="ce_videoelementnexx-video__player"\sid="video-player"\sdata-id="(?P[0-9]+)"', + video_id = self._html_search_regex( + r'id="video-player"\sdata-id="(?P[0-9]+)"', webpage, 'id') return self.url_result( 'https://api.nexx.cloud/v3/759/videos/byid/%s' - % id, ie=NexxIE.ie_key()) + % video_id, ie=NexxIE.ie_key()) From dab29d0fce69f466f810db22cbf04e381f0ecb12 Mon Sep 17 00:00:00 2001 From: Jens Rutschmann Date: Mon, 3 Sep 2018 00:34:54 +0200 Subject: [PATCH 3/3] Implemented requested changes. --- youtube_dl/extractor/tele5.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/tele5.py b/youtube_dl/extractor/tele5.py index f9990d23e..cfa8d2475 100644 --- a/youtube_dl/extractor/tele5.py +++ b/youtube_dl/extractor/tele5.py @@ -1,12 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from .nexx import NexxIE class Tele5IE(InfoExtractor): - _VALID_URL = r'https://www.tele5.de/[mediathek/filme-online/videos|tv/]' + _VALID_URL = r'https://www\.tele5\.de/(?:mediathek/filme-online/videos\?vid=|tv/)(?P[\w-]+)' _TESTS = [{ 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1550589', @@ -15,26 +17,23 @@ class Tele5IE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20180822', 'timestamp': 1534927316, - 'title': 'SchleFaZ: Atomic Shark' + 'title': 'SchleFaZ: Atomic Shark', } }, { 'url': 'https://www.tele5.de/tv/dark-matter/videos', - 'info_dict': { - 'id': '1548206', - 'ext': 'mp4', - 'title': 'Folge Sechsundzwanzig', - 'timestamp': 1533664358, - 'upload_date': '20180807' - } + 'only_matching': True, }] def _real_extract(self, url): - webpage = self._download_webpage(url, 'N/A') + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('display_id') + + webpage = self._download_webpage(url, display_id) video_id = self._html_search_regex( - r'id="video-player"\sdata-id="(?P[0-9]+)"', - webpage, 'id') + r'id\s*=\s*["\']video-player["\']\s*data-id\s*=\s*["\']([0-9]+)["\']', + webpage, 'video_id') return self.url_result( - 'https://api.nexx.cloud/v3/759/videos/byid/%s' - % video_id, ie=NexxIE.ie_key()) + 'https://api.nexx.cloud/v3/759/videos/byid/%s' % video_id, + ie=NexxIE.ie_key(), video_id=video_id)