[20detik] Extract 20detik embeds

This commit is contained in:
Surya Oktafendri 2018-04-03 10:08:11 +07:00
parent 79356eb3ee
commit b2805ca141
No known key found for this signature in database
GPG Key ID: 8CAB076E32F1FC8D
3 changed files with 60 additions and 5 deletions

View File

@ -0,0 +1,59 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class DuaPuluhDetikIE(InfoExtractor):
_VALID_URL = r'https?://20\.detik\.com/embed/(?P<video_id>\d+)'
IE_NAME = '20detik'
_TESTS = [{
'url': 'https://20.detik.com/embed/180403001?autostart=1',
'info_dict': {
'id': '180403001',
'title': 'Dahsyatnya Rudal Anti-balistik yang Diuji Coba Rusia',
'description': '',
'thumbnail': r're:^https?://.*\.jpg(\?.*)?$',
'ext': 'mp4'
}
}, {
'url': 'https://20.detik.com/embed/180326044',
'info_dict': {
'id': '180326044',
'title': 'md5:204cbc0b3b51b701ee9dc6a502f1e17b',
'description': 'md5:227d860110eda61876b243e23fe38538',
'thumbnail': r're:^https?://.*\.jpg(\?.*)?$',
'ext': 'mp4'
}
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id)
m3u8_url = self._html_search_regex(
r'["\']videoUrl["\']\s*:\s*["\'](?P<m3u8_url>.+)["\']',
webpage, 'm3u8_url')
if m3u8_url is None:
raise ExtractorError('Video not found')
title = self._og_search_title(webpage)
description = self._og_search_description(
webpage, default='')
thumbnail = self._og_search_property(
'image', webpage)
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native')
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'formats': formats
}

View File

@ -258,6 +258,7 @@ from .dbtv import DBTVIE
from .dctp import DctpTvIE
from .deezer import DeezerPlaylistIE
from .democracynow import DemocracynowIE
from .detik import DuaPuluhDetikIE
from .dfb import DFBIE
from .dhm import DHMIE
from .digg import DiggIE

View File

@ -3155,11 +3155,6 @@ class GenericIE(InfoExtractor):
if embed_url and embed_url != url:
return self.url_result(embed_url)
if not found:
# DetikFlow: It's basically a 'modified' FlowPlayer used in https://20.detik.com
found = re.findall(
r'["\']videoUrl["\']\s*:\s*["\']([^"\']+)["\']', webpage)
if not found:
raise UnsupportedError(url)