diff --git a/youtube_dl/extractor/detik.py b/youtube_dl/extractor/detik.py deleted file mode 100644 index 6347017e5..000000000 --- a/youtube_dl/extractor/detik.py +++ /dev/null @@ -1,65 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ExtractorError - - -class DuaPuluhDetikIE(InfoExtractor): - _VALID_URL = r'https?://20\.detik\.com/[^/]+/(?P\d+)-(?P\d+)/(?P[^/?#&]+)' - IE_NAME = '20detik' - _TESTS = [{ - 'url': 'https://20.detik.com/detikflash/20180328-180328002/dramatis-polisi-selamatkan-pria-yang-coba-bunuh-diri', - 'info_dict': { - 'id': '180328002', - 'display_id': '20180328-180328002', - 'slug': 'dramatis-polisi-selamatkan-pria-yang-coba-bunuh-diri', - 'upload_date': '20180328', - 'title': 'md5:92c18d820d8937f259007e9c6ce40e6b', - 'description': 'md5:3953164fc1746eb98aa3729140f9b5b8', - 'thumbnail': r're:^https?://.*\.jpg(\?.*)?$', - 'ext': 'mp4' - } - }, { - 'url': 'https://20.detik.com/e-flash/20180328-180328009/unboxing-huawei-p20-pro-', - 'only_matching': True - }, { - 'url': 'https://20.detik.com/otobuzz/20180228-180228081/primadona-baru-di-kelas-low-mpv', - 'only_matching': True - }, { - 'url': 'https://20.detik.com/sport-buzz/20180328-180328013/messi-kabur-melihat-argentina-dibantai-spanyol', - 'only_matching': True - }, { - 'url': 'https://20.detik.com/piala-dunia-2018/20180328-180328005/gary-lineker-dan-memori-piala-dunia-1986', - 'only_matching': True - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - upload_date, video_id = mobj.group('date', 'id') - embed_url = 'https://20.detik.com/embed/%s' % video_id - display_id = "%s-%s" % (upload_date, video_id) - webpage = self._download_webpage(embed_url, video_id) - m3u8_url = self._html_search_regex( - r'''["\']videoUrl["\']\s*:\s*["\'](?P.+)["\']''', - webpage, 'm3u8_url', default='') - if len(m3u8_url) == 0: - raise ExtractorError('Video not found') - title = self._og_search_title(webpage) - description = self._og_search_description(webpage) - thumbnail = self._og_search_property('image', webpage) - formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native') - self._sort_formats(formats) - - return { - 'id': video_id, - 'display_id': display_id, - 'upload_date': upload_date, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'formats': formats - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2aa369663..de48a37ad 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -258,7 +258,6 @@ from .dbtv import DBTVIE from .dctp import DctpTvIE from .deezer import DeezerPlaylistIE from .democracynow import DemocracynowIE -from .detik import DuaPuluhDetikIE from .dfb import DFBIE from .dhm import DHMIE from .digg import DiggIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index cf64398e3..db13d9968 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1967,6 +1967,15 @@ class GenericIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, + { + # 20detik embed + 'url': 'https://20.detik.com/detikflash/20180328-180328002/dramatis-polisi-selamatkan-pria-yang-coba-bunuh-diri', + 'info_dict': { + 'id': '180328002', + 'title': 'md5:92c18d820d8937f259007e9c6ce40e6b', + 'ext': 'mp4' + } } # { # # TODO: find another test @@ -2829,6 +2838,13 @@ class GenericIE(InfoExtractor): }) return info + # Look for 20detik (https://20.detik.com) embeds + mobj = re.search( + r']+?src=(["\'])(?Phttps?://20\.detik\.com/embed/(\d+)[^"\']+?)\1', + webpage) + if mobj is not None: + return self.url_result(mobj.group('url')) + # Look for Instagram embeds instagram_embed_url = InstagramIE._extract_embed_url(webpage) if instagram_embed_url is not None: @@ -3139,6 +3155,11 @@ class GenericIE(InfoExtractor): if embed_url and embed_url != url: return self.url_result(embed_url) + if not found: + # DetikFlow: It's basically a 'modified' FlowPlayer used in https://20.detik.com + found = re.findall( + r'["\']videoUrl["\']\s*:\s*["\']([^"\']+)["\']', webpage) + if not found: raise UnsupportedError(url)