[Generic] Move 20detik embed extractor into generic extractor

This commit is contained in:
Surya Oktafendri 2018-03-31 09:04:14 +07:00
parent 662d14ea7e
commit 14f4c41cef
No known key found for this signature in database
GPG Key ID: 8CAB076E32F1FC8D
3 changed files with 21 additions and 66 deletions

View File

@ -1,65 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class DuaPuluhDetikIE(InfoExtractor):
_VALID_URL = r'https?://20\.detik\.com/[^/]+/(?P<date>\d+)-(?P<id>\d+)/(?P<slug>[^/?#&]+)'
IE_NAME = '20detik'
_TESTS = [{
'url': 'https://20.detik.com/detikflash/20180328-180328002/dramatis-polisi-selamatkan-pria-yang-coba-bunuh-diri',
'info_dict': {
'id': '180328002',
'display_id': '20180328-180328002',
'slug': 'dramatis-polisi-selamatkan-pria-yang-coba-bunuh-diri',
'upload_date': '20180328',
'title': 'md5:92c18d820d8937f259007e9c6ce40e6b',
'description': 'md5:3953164fc1746eb98aa3729140f9b5b8',
'thumbnail': r're:^https?://.*\.jpg(\?.*)?$',
'ext': 'mp4'
}
}, {
'url': 'https://20.detik.com/e-flash/20180328-180328009/unboxing-huawei-p20-pro-',
'only_matching': True
}, {
'url': 'https://20.detik.com/otobuzz/20180228-180228081/primadona-baru-di-kelas-low-mpv',
'only_matching': True
}, {
'url': 'https://20.detik.com/sport-buzz/20180328-180328013/messi-kabur-melihat-argentina-dibantai-spanyol',
'only_matching': True
}, {
'url': 'https://20.detik.com/piala-dunia-2018/20180328-180328005/gary-lineker-dan-memori-piala-dunia-1986',
'only_matching': True
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
upload_date, video_id = mobj.group('date', 'id')
embed_url = 'https://20.detik.com/embed/%s' % video_id
display_id = "%s-%s" % (upload_date, video_id)
webpage = self._download_webpage(embed_url, video_id)
m3u8_url = self._html_search_regex(
r'''["\']videoUrl["\']\s*:\s*["\'](?P<m3u8_url>.+)["\']''',
webpage, 'm3u8_url', default='')
if len(m3u8_url) == 0:
raise ExtractorError('Video not found')
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
thumbnail = self._og_search_property('image', webpage)
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native')
self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
'upload_date': upload_date,
'title': title,
'description': description,
'thumbnail': thumbnail,
'formats': formats
}

View File

@ -258,7 +258,6 @@ from .dbtv import DBTVIE
from .dctp import DctpTvIE
from .deezer import DeezerPlaylistIE
from .democracynow import DemocracynowIE
from .detik import DuaPuluhDetikIE
from .dfb import DFBIE
from .dhm import DHMIE
from .digg import DiggIE

View File

@ -1967,6 +1967,15 @@ class GenericIE(InfoExtractor):
'params': {
'skip_download': True,
},
},
{
# 20detik embed
'url': 'https://20.detik.com/detikflash/20180328-180328002/dramatis-polisi-selamatkan-pria-yang-coba-bunuh-diri',
'info_dict': {
'id': '180328002',
'title': 'md5:92c18d820d8937f259007e9c6ce40e6b',
'ext': 'mp4'
}
}
# {
# # TODO: find another test
@ -2829,6 +2838,13 @@ class GenericIE(InfoExtractor):
})
return info
# Look for 20detik (https://20.detik.com) embeds
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https?://20\.detik\.com/embed/(\d+)[^"\']+?)\1',
webpage)
if mobj is not None:
return self.url_result(mobj.group('url'))
# Look for Instagram embeds
instagram_embed_url = InstagramIE._extract_embed_url(webpage)
if instagram_embed_url is not None:
@ -3139,6 +3155,11 @@ class GenericIE(InfoExtractor):
if embed_url and embed_url != url:
return self.url_result(embed_url)
if not found:
# DetikFlow: It's basically a 'modified' FlowPlayer used in https://20.detik.com
found = re.findall(
r'["\']videoUrl["\']\s*:\s*["\']([^"\']+)["\']', webpage)
if not found:
raise UnsupportedError(url)