From b2805ca14109e2e4eed3ed99a70bde43f3fa9671 Mon Sep 17 00:00:00 2001 From: Surya Oktafendri Date: Tue, 3 Apr 2018 10:08:11 +0700 Subject: [PATCH] [20detik] Extract 20detik embeds --- youtube_dl/extractor/detik.py | 59 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/generic.py | 5 --- 3 files changed, 60 insertions(+), 5 deletions(-) create mode 100644 youtube_dl/extractor/detik.py diff --git a/youtube_dl/extractor/detik.py b/youtube_dl/extractor/detik.py new file mode 100644 index 000000000..e97745acd --- /dev/null +++ b/youtube_dl/extractor/detik.py @@ -0,0 +1,59 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class DuaPuluhDetikIE(InfoExtractor): + _VALID_URL = r'https?://20\.detik\.com/embed/(?P\d+)' + IE_NAME = '20detik' + _TESTS = [{ + 'url': 'https://20.detik.com/embed/180403001?autostart=1', + 'info_dict': { + 'id': '180403001', + 'title': 'Dahsyatnya Rudal Anti-balistik yang Diuji Coba Rusia', + 'description': '', + 'thumbnail': r're:^https?://.*\.jpg(\?.*)?$', + 'ext': 'mp4' + } + }, { + 'url': 'https://20.detik.com/embed/180326044', + 'info_dict': { + 'id': '180326044', + 'title': 'md5:204cbc0b3b51b701ee9dc6a502f1e17b', + 'description': 'md5:227d860110eda61876b243e23fe38538', + 'thumbnail': r're:^https?://.*\.jpg(\?.*)?$', + 'ext': 'mp4' + } + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('video_id') + webpage = self._download_webpage(url, video_id) + m3u8_url = self._html_search_regex( + r'["\']videoUrl["\']\s*:\s*["\'](?P.+)["\']', + webpage, 'm3u8_url') + + if m3u8_url is None: + raise ExtractorError('Video not found') + + title = self._og_search_title(webpage) + description = self._og_search_description( + webpage, default='') + thumbnail = self._og_search_property( + 'image', webpage) + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'formats': formats + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index de48a37ad..2aa369663 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -258,6 +258,7 @@ from .dbtv import DBTVIE from .dctp import DctpTvIE from .deezer import DeezerPlaylistIE from .democracynow import DemocracynowIE +from .detik import DuaPuluhDetikIE from .dfb import DFBIE from .dhm import DHMIE from .digg import DiggIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index db13d9968..e9fde09e6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -3155,11 +3155,6 @@ class GenericIE(InfoExtractor): if embed_url and embed_url != url: return self.url_result(embed_url) - if not found: - # DetikFlow: It's basically a 'modified' FlowPlayer used in https://20.detik.com - found = re.findall( - r'["\']videoUrl["\']\s*:\s*["\']([^"\']+)["\']', webpage) - if not found: raise UnsupportedError(url)