From c73c24420384912de1eca4d1a9a63cd7f27daa9f Mon Sep 17 00:00:00 2001 From: Martin Hartkorn Date: Sat, 16 Sep 2017 13:50:49 +0200 Subject: [PATCH 1/3] Added extractor for pietsmiet.de --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/pietsmiet.py | 79 ++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 youtube_dl/extractor/pietsmiet.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ecb33bc9e..b1ed1a088 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -793,6 +793,7 @@ from .periscope import ( from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE +from .pietsmiet import PietsmietIE from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pladform import PladformIE diff --git a/youtube_dl/extractor/pietsmiet.py b/youtube_dl/extractor/pietsmiet.py new file mode 100644 index 000000000..f39bef0d5 --- /dev/null +++ b/youtube_dl/extractor/pietsmiet.py @@ -0,0 +1,79 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +from .once import OnceIE +from ..compat import ( + compat_urllib_parse_unquote, +) +from ..utils import ( + unescapeHTML, + js_to_json, + int_or_none, +) + + +class PietsmietIE(OnceIE): + _VALID_URL = r'https?://(?:www\.)?pietsmiet\.de/gallery/categories/[\w-]+/(?P\d+)-.*/?' + _TEST = { + 'url': 'http://www.pietsmiet.de/gallery/categories/8-frag-pietsmiet/29844-fps-912', + 'info_dict': { + 'id': '29844', + 'ext': 'mp4', + 'title': 'Was würdet ihr die Maus fragen? 🎮 Frag PietSmiet #912', + }, + 'params': { + 'skip_download': True, # m3u8 downloads + }, + } + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + data_video_config = self._search_regex( + r'var config=(.*?);var', webpage, 'video config') + data_video = self._parse_json(js_to_json(unescapeHTML(data_video_config)), page_id) + + formats = [] + + m3u8_manifest_url = data_video['sources'][0]['file'] + m3u8_formats = self._extract_m3u8_formats( + m3u8_manifest_url, page_id, 'mp4', 'm3u8_native', + m3u8_id='hls') + + # Give reproducible names for HLS formats instead of hls- + for f in m3u8_formats: + f['format_id'] = 'hls-{}p'.format(f['height']) + + formats.extend(m3u8_formats) + + if len(data_video['sources']) > 1: + http_video = data_video['sources'][1] + + # Calculate resolution for HTTP format but should always be 1280x720 + format_height_raw = self._search_regex( + '([0-9]+)p', http_video['label'], 'http video height', + default=720, fatal=False) + format_height = int_or_none(format_height_raw) + + if format_height: + format_width = float(format_height) * (16 / 9) + + formats.append({ + 'url': "https:{}".format(http_video['file']), + 'ext': http_video['type'], + 'format_id': 'http-{}'.format(http_video['label']), + 'width': int_or_none(format_width), + 'height': format_height, + 'fps': 30.0, + }) + + self._sort_formats(formats) + + return { + 'id': page_id, + 'display_id': page_id, + 'title': compat_urllib_parse_unquote(data_video['abouttext']), + 'formats': formats, + 'thumbnail': 'http://www.pietsmiet.de/{}'.format(data_video.get('image')), + } From 1ff963a23012a21f5197a212465052f60bba70d8 Mon Sep 17 00:00:00 2001 From: Martin Hartkorn Date: Sun, 17 Sep 2017 11:56:46 +0200 Subject: [PATCH 2/3] Fixes according to suggestions --- youtube_dl/extractor/pietsmiet.py | 39 +++++++++++++++++-------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/pietsmiet.py b/youtube_dl/extractor/pietsmiet.py index f39bef0d5..99a3c32fc 100644 --- a/youtube_dl/extractor/pietsmiet.py +++ b/youtube_dl/extractor/pietsmiet.py @@ -41,30 +41,35 @@ class PietsmietIE(OnceIE): m3u8_manifest_url, page_id, 'mp4', 'm3u8_native', m3u8_id='hls') - # Give reproducible names for HLS formats instead of hls- - for f in m3u8_formats: - f['format_id'] = 'hls-{}p'.format(f['height']) - formats.extend(m3u8_formats) if len(data_video['sources']) > 1: http_video = data_video['sources'][1] - # Calculate resolution for HTTP format but should always be 1280x720 - format_height_raw = self._search_regex( - '([0-9]+)p', http_video['label'], 'http video height', - default=720, fatal=False) - format_height = int_or_none(format_height_raw) + label = http_video.get('label') - if format_height: - format_width = float(format_height) * (16 / 9) + if label: + # Calculate resolution for HTTP format but should always be 1280x720 + format_height_raw = self._search_regex( + '([0-9]+)p', label, 'http video height', + default=720, fatal=False) + format_height = int_or_none(format_height_raw) + if format_height: + format_width = float(format_height) * (16 / 9) + + formats.append({ + 'url': "https:{0}".format(http_video['file']), + 'ext': http_video.get('type'), + 'format_id': 'http-{0}'.format(label), + 'width': int_or_none(format_width), + 'height': format_height, + 'fps': 30.0, + }) + else: formats.append({ - 'url': "https:{}".format(http_video['file']), - 'ext': http_video['type'], - 'format_id': 'http-{}'.format(http_video['label']), - 'width': int_or_none(format_width), - 'height': format_height, + 'url': "https:{0}".format(http_video['file']), + 'ext': http_video.get('type'), 'fps': 30.0, }) @@ -75,5 +80,5 @@ class PietsmietIE(OnceIE): 'display_id': page_id, 'title': compat_urllib_parse_unquote(data_video['abouttext']), 'formats': formats, - 'thumbnail': 'http://www.pietsmiet.de/{}'.format(data_video.get('image')), + 'thumbnail': 'http://www.pietsmiet.de/{0}'.format(data_video.get('image')), } From 51285bd7c7fa1e8189f3d640e81927ee6df9ded8 Mon Sep 17 00:00:00 2001 From: Martin Hartkorn Date: Fri, 13 Oct 2017 19:44:27 +0200 Subject: [PATCH 3/3] Allow formats with labels that don't match regex ([0-9]+)p --- youtube_dl/extractor/pietsmiet.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/pietsmiet.py b/youtube_dl/extractor/pietsmiet.py index 99a3c32fc..3e0b1663c 100644 --- a/youtube_dl/extractor/pietsmiet.py +++ b/youtube_dl/extractor/pietsmiet.py @@ -47,6 +47,7 @@ class PietsmietIE(OnceIE): http_video = data_video['sources'][1] label = http_video.get('label') + format_height = 0 if label: # Calculate resolution for HTTP format but should always be 1280x720 @@ -55,17 +56,17 @@ class PietsmietIE(OnceIE): default=720, fatal=False) format_height = int_or_none(format_height_raw) - if format_height: - format_width = float(format_height) * (16 / 9) + if format_height > 0: + format_width = float(format_height) * (16 / 9) - formats.append({ - 'url': "https:{0}".format(http_video['file']), - 'ext': http_video.get('type'), - 'format_id': 'http-{0}'.format(label), - 'width': int_or_none(format_width), - 'height': format_height, - 'fps': 30.0, - }) + formats.append({ + 'url': "https:{0}".format(http_video['file']), + 'ext': http_video.get('type'), + 'format_id': 'http-{0}'.format(label), + 'width': int_or_none(format_width), + 'height': format_height, + 'fps': 30.0, + }) else: formats.append({ 'url': "https:{0}".format(http_video['file']),