[yle] Add extractor YLEAreena
This commit is contained in:
parent
10d6c7e1b7
commit
43e449f2e9
@ -1322,6 +1322,9 @@ from .yandexmusic import (
|
||||
from .yandexdisk import YandexDiskIE
|
||||
from .yesjapan import YesJapanIE
|
||||
from .yinyuetai import YinYueTaiIE
|
||||
from .yle import (
|
||||
YLEAreenaIE,
|
||||
)
|
||||
from .ynet import YnetIE
|
||||
from .youjizz import YouJizzIE
|
||||
from .youku import (
|
||||
|
242
youtube_dl/extractor/yle.py
Normal file
242
youtube_dl/extractor/yle.py
Normal file
@ -0,0 +1,242 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
bytes_to_intlist,
|
||||
intlist_to_bytes,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
ExtractorError,
|
||||
)
|
||||
from ..aes import (
|
||||
aes_cfb_decrypt,
|
||||
BLOCK_SIZE_BYTES,
|
||||
)
|
||||
|
||||
|
||||
class YLEAreenaIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?P<host>areena|arenan)\.yle\.fi/(?:|tv/(?P<live>suorat|direkt)/)(?P<id>[^/?#]+)(?:[?#].+)?$'
|
||||
_PROTOCOLS = ['RTMPE', 'HDS', 'HLS']
|
||||
_AES_KEY = 'yjuap4n5ok9wzg43'
|
||||
_LIMIT = 50
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://areena.yle.fi/1-2825412',
|
||||
'md5': 'c9a0d9cf91a0596126531d6e91114dff',
|
||||
'info_dict': {
|
||||
'id': '6-c0a4c751a4c84dbca2292d2dc4066957',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150525',
|
||||
'description': 'md5:e71da182216dd2e350f9009872c7d72c',
|
||||
'title': 'Ylen aamu-tv: Näin liputat oikein',
|
||||
'timestamp': 1432529700,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://areena.yle.fi/1-3238941',
|
||||
'info_dict': {
|
||||
'id': '1-3238941',
|
||||
'title': 'Yle News',
|
||||
'description': 'md5:2f7abf6497f6c447c1a3f757b8b0851d',
|
||||
},
|
||||
'playlist_mincount': 25,
|
||||
},
|
||||
{
|
||||
'url': 'http://areena.yle.fi/tv/suorat/yle-tv1',
|
||||
'info_dict': {
|
||||
'title': 're:^Yle TV1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'id': '10-1',
|
||||
'ext': 'mp4',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
|
||||
lang = 'fi'
|
||||
if mobj.group('host') == 'arenan':
|
||||
lang = 'sv'
|
||||
|
||||
if mobj.group('live'):
|
||||
program_url = 'http://player.yle.fi/api/v1/services.jsonp?id={pid}®ion=fi'
|
||||
program_key = 'service'
|
||||
event_key = 'outlet'
|
||||
is_live = True
|
||||
else:
|
||||
program_url = 'http://player.yle.fi/api/v1/programs.jsonp?id={pid}'
|
||||
program_key = 'program'
|
||||
event_key = 'publicationEvent'
|
||||
is_live = False
|
||||
data = []
|
||||
|
||||
for i in itertools.count():
|
||||
path = '/api/programs/v1/items.json?' \
|
||||
'series=%s&type=program&availability=ondemand&' \
|
||||
'order=publication.starttime%%3Adesc&app_id=89868a18&' \
|
||||
'app_key=54bb4ea4d92854a2a45e98f961f0d7da&' \
|
||||
'limit=%d&offset=%d&olang=fi' % (
|
||||
display_id, self._LIMIT, i * self._LIMIT)
|
||||
|
||||
series_url = compat_urlparse.urljoin(url, path)
|
||||
json_data = self._download_json(
|
||||
series_url, display_id, 'Downloading page %d' % (i + 1),
|
||||
fatal=False)
|
||||
|
||||
if json_data is None:
|
||||
break
|
||||
|
||||
page_data = json_data.get('data', [])
|
||||
data.extend(page_data)
|
||||
|
||||
if len(page_data) < self._LIMIT:
|
||||
break
|
||||
if data:
|
||||
series = data[0].get('partOfSeries', {})
|
||||
urls = []
|
||||
for clip in data:
|
||||
clip_id = clip.get('id')
|
||||
title = clip.get('title', {}).get(lang, '')
|
||||
item_title = clip.get('itemTitle', {}).get(lang, '')
|
||||
if item_title:
|
||||
title = '%s-%s' % (title, item_title)
|
||||
urls.append(self.url_result(
|
||||
compat_urlparse.urljoin(url, '/' + clip_id),
|
||||
video_id=clip_id,
|
||||
video_title=title))
|
||||
|
||||
return self.playlist_result(
|
||||
urls, playlist_id=display_id,
|
||||
playlist_title=series.get('title', {}).get(lang),
|
||||
playlist_description=series.get('description', {}).get(lang))
|
||||
|
||||
clip_info = self._download_json(
|
||||
program_url.format(pid=display_id), display_id)
|
||||
program_info = clip_info.get('data', {}).get(program_key, {})
|
||||
events = program_info.get(event_key, [])
|
||||
events_with_media = [e for e in events if e.get('media', {})]
|
||||
|
||||
if not events_with_media:
|
||||
return
|
||||
|
||||
media_id = events_with_media[0]['media'].get('id')
|
||||
formats, subtitles = self._extract_formats(media_id)
|
||||
self._sort_formats(formats)
|
||||
title = program_info.get('title', {}).get(lang, '')
|
||||
|
||||
if is_live:
|
||||
title = self._live_title(title)
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'title': title,
|
||||
'episode': program_info.get('itemTitle', {}).get(lang),
|
||||
'episode_number': int_or_none(program_info.get('episodeNumber')),
|
||||
'season_number': int_or_none(program_info.get('partOfSeason', {}).get('seasonNumber')),
|
||||
'series': program_info.get('partOfSeries', {}).get('title', {}).get(lang),
|
||||
'description': program_info.get('description', {}).get(lang),
|
||||
'formats': formats,
|
||||
'duration': parse_duration(program_info.get('duration')),
|
||||
'timestamp': parse_iso8601(events_with_media[0].get('startTime')),
|
||||
'subtitles': subtitles,
|
||||
'age_limit': program_info.get('contentRating', {}).get('ageRestriction', 0),
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
def _extract_formats(self, media_id):
|
||||
media_infos = []
|
||||
subtitles = []
|
||||
for protocol in self._PROTOCOLS:
|
||||
media_url = 'http://player.yle.fi/api/v1/media.jsonp?id=%s&' \
|
||||
'protocol=%s&client=areena-flash-player' % (
|
||||
media_id, protocol)
|
||||
media_response = self._download_json(media_url, media_id)
|
||||
media_infos.extend(media_response['data']['media'][protocol])
|
||||
|
||||
has_subtitles = [i for i in media_infos if i.get('subtitles')]
|
||||
|
||||
formats = []
|
||||
for fmt in media_infos:
|
||||
if has_subtitles and 'hardsubtitle' in fmt:
|
||||
continue
|
||||
|
||||
url = self._decrypt_data(fmt['url']).decode('utf-8')
|
||||
sub_list = fmt.get('subtitles', [])
|
||||
|
||||
extracted_subtitles = self.extract_subtitles(sub_list)
|
||||
if subtitles:
|
||||
if subtitles != extracted_subtitles:
|
||||
raise ExtractorError(
|
||||
'Different formats return different subtitle urls')
|
||||
else:
|
||||
subtitles = extracted_subtitles
|
||||
if fmt['protocol'] == 'HDS':
|
||||
sep = '&' if '?' in url else '?'
|
||||
url += sep + 'hdcore=3.3.0&plugin=flowplayer-3.3.0.0'
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
url, media_id, f4m_id='hds', fatal=False))
|
||||
continue
|
||||
if fmt['protocol'] == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
url, media_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
|
||||
abr = fmt.get('audioBitrateKbps', 0)
|
||||
vbr = fmt.get('videoBitrateKbps', 0)
|
||||
tbr = vbr + abr
|
||||
|
||||
formats.append({
|
||||
'format_id': 'rtmp-%d' % tbr if tbr else 'rtmp',
|
||||
'url': url,
|
||||
'ext': 'flv',
|
||||
'abr': abr if abr else None,
|
||||
'vbr': vbr if vbr else None,
|
||||
'tbr': tbr if tbr else None,
|
||||
'height': fmt.get('height'),
|
||||
'width': fmt.get('width'),
|
||||
'rtmp_live': fmt.get('live'),
|
||||
'protocol': 'rtmp',
|
||||
'preference': -1,
|
||||
})
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _get_subtitles(self, sub_list):
|
||||
subtitles = {}
|
||||
for sub in sub_list:
|
||||
subtitles[sub['lang']] = [{
|
||||
'ext': 'srt',
|
||||
'url': sub['uri'],
|
||||
}]
|
||||
return subtitles
|
||||
|
||||
def _decrypt_data(self, data):
|
||||
data = bytes_to_intlist(base64.b64decode(data))
|
||||
key = bytes_to_intlist(self._AES_KEY.encode('utf-8'))
|
||||
|
||||
iv = data[:BLOCK_SIZE_BYTES]
|
||||
cipher = data[BLOCK_SIZE_BYTES:]
|
||||
decrypted_data = aes_cfb_decrypt(cipher, key, iv)
|
||||
|
||||
plaintext = intlist_to_bytes(decrypted_data)
|
||||
|
||||
return plaintext
|
Loading…
x
Reference in New Issue
Block a user