Elan Ruusamäe 816c8a592d [kanal2]: extractor rewritten to use json
some scenarios dropped, test urls missing
2019-09-27 00:22:24 +03:00

118 lines
4.6 KiB
Python

# coding: utf-8
from __future__ import unicode_literals
from datetime import datetime
import re
import time
from .common import InfoExtractor
from ..utils import ExtractorError
class Kanal2IE(InfoExtractor):
SUBTITLE_DATE_RE = re.compile(r'\((\d{2}\.\d{2}\.\d{4}\s\d{2}:\d{2})\)$')
_VALID_URL = r'(?P<base>https?://.+\.postimees\.ee)[a-zA-Z0-9/._-]+\?[a-zA-Z0-9=&._-]*id=(?P<id>[a-zA-Z0-9_-]+)[^ ]*'
_TESTS = [
{
# Standard url
'url': 'https://kanal2.postimees.ee/pluss/video/?id=40792',
'md5': 'cecaf3e17706d725b1f23e886b67f8d3',
'info_dict': {
'id': '40792',
'ext': 'mp4',
'title': 'Aedniku aabits / Osa 53 (05.08.2016 20:00)',
'thumbnail': 'https://kanal-dl.babahhcdn.com/kanal/2016/08/05/0053_HNqKsIA/img/2.jpg',
'description': 'Aedniku aabits" on saade, mis pakub kaasaelamist ja teadmisi nii algajatele, kui juba kogenud rohenäppudele. Kõik alates vajalikest näpunäidetest, nutikatest lahendustest, uudistoodetest kuni taimede hingeeluni ning aias kasutatava tehnikani välja.',
'upload_date': '20160805',
'timestamp': 1470416400,
}
},
{
# Other url example
'url': 'http://kanal2.postimees.ee/pluss/preview?id=40744',
'md5': 'e1dcc6e39d17a3f04749a8158db26377',
'info_dict': {
'id': '40744',
'ext': 'mp4',
'title': 'Kaunis Dila / Osa 50 (10.08.2016 19:00)',
'thumbnail': 'https://kanal-dl.babahhcdn.com/kanal/2018/12/05/16_300_00208_0050-Kaunis_Dila_hamdY9I/img/2.jpg',
'description': u'Riza ei tea, mis oht teda ja ta pere Selcuki n\xe4ol varitseb. Azer kahtlustab, et Fatma elus on uus mees ja on valmis k\xf5igeks, et ta endale tagasi v\xf5ita. See tekitab aga Arzus suurt hirmu.',
'timestamp': 1470844800,
'upload_date': '20160810',
}
},
]
def _real_extract(self, url):
video_id = self._match_id(url)
playlist = self.get_playlist(video_id)
# return a dict, description from here:
# https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303
info = {
'id': video_id,
'title': self.get_title(playlist['info']),
'description': playlist['info'].get('description'),
'webpage_url': playlist['data'].get('url'),
'thumbnail': playlist['data'].get('image'),
'formats': self.get_formats(playlist, video_id),
'timestamp': self.get_timestamp(playlist['info']['subtitle']),
}
return info
def get_title(self, info):
title = info['title']
if info['subtitle']:
title += ' / ' + info['subtitle']
return title
def get_timestamp(self, subtitle):
# Extract timestamp from:
# "subtitle": "Osa 53 (05.08.2016 20:00)",
match = self._search_regex(self.SUBTITLE_DATE_RE, subtitle, 'dateandtime', default=None)
if not match:
return None
# https://stackoverflow.com/a/27914405/2314626
date = datetime.strptime(match, '%d.%m.%Y %H:%M')
unixtime = time.mktime(date.timetuple())
return int(unixtime)
def get_formats(self, playlist, video_id):
formats = []
session = self.get_session(playlist['data']['path'], video_id)
sid = session.get('session')
for stream in playlist['data']['streams']:
formats.append({
'protocol': 'm3u8',
'ext': 'mp4',
'url': stream.get('file') + '&s=' + sid,
})
return formats
def get_playlist(self, video_id):
url = 'https://kanal2.postimees.ee/player/playlist/%(video_id)s?type=episodes' % {'video_id': video_id}
headers = {
'X-Requested-With': 'XMLHttpRequest',
}
return self._download_json(url, video_id, headers=headers)
def get_session(self, path, video_id):
url = 'https://sts.postimees.ee/session/register'
headers = {
'X-Original-URI': path,
'Accept': 'application/json',
}
session = self._download_json(url, video_id, headers=headers,
note='Creating session',
errnote='Error creating session')
if session['reason'] != 'OK':
raise ExtractorError('%s: Unable to obtain session' % self.IE_NAME)
return session