[tvnplayer] Add extractor

This commit is contained in:
Tithen-Firion 2017-03-19 01:04:24 +01:00
parent f68ef1e2ab
commit fc8e22df8f
2 changed files with 136 additions and 0 deletions

View File

@ -1028,6 +1028,11 @@ from .tvigle import TvigleIE
from .tvland import TVLandIE from .tvland import TVLandIE
from .tvn24 import TVN24IE from .tvn24 import TVN24IE
from .tvnoe import TVNoeIE from .tvnoe import TVNoeIE
from .tvnplayer import (
TVNIE,
TVNPlayerIE,
TVNPlayerSeriesIE,
)
from .tvp import ( from .tvp import (
TVPEmbedIE, TVPEmbedIE,
TVPIE, TVPIE,

View File

@ -0,0 +1,131 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import determine_ext
class TVNIE(InfoExtractor):
IE_NAME = 'tvn'
_VALID_URL = r'https?://(?:[^/]+\.)?tvn\.pl/.*?,(?P<id>\d+),o\.html'
_TEST = {
'url': 'http://nawspolnej.tvn.pl/odcinki-online/odcinek-2440,71587,o.html',
'md5': '1de06bc87774c334ac473f79ee4a5719',
'info_dict': {
'id': '70618',
'ext': 'mp4',
'title': 'Na Wspólnej, odc. 2440',
'description': 'md5:73b915646094286a6b3c159c79c67e38',
},
}
def _real_extract(self, url):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
video_id = self._search_regex([
r'/player.pl/.*?,(\d+),autoplay.html',
r'var\s+var_id\s+=\s+[\'"](\d+)'], webpage, 'video id')
return self.url_result('http://player.pl/,%s.html' % video_id, ie=TVNPlayerIE.ie_key())
class TVNPlayerIE(InfoExtractor):
IE_NAME = 'tvnplayer'
_VALID_URL = r'https?://player\.pl/.*?,(?P<id>\d+)(?:,\w+)?\.html'
_TEST = {
'url': 'http://player.pl/seriale-online/na-wspolnej-odcinki,144/odcinek-2436,S00E2436,70614.html',
'md5': '1e1457420045d36c488c94cf35d8e9cb',
'info_dict': {
'id': '70614',
'ext': 'mp4',
'title': 'Na Wspólnej, odc. 2436',
'description': 'md5:651cbbc050c3716200cb3f61c24e85b5',
},
}
def determine_bitrate(self, url):
str_bitrate = url.partition("?")[0].rpartition('.')[0].rpartition("_")[-1]
return int(str_bitrate)/1000
def _real_extract(self, url):
video_id = self._match_id(url)
json_url = 'http://player.pl/api/?platform=ConnectedTV&terminal=Panasonic&format=json&authKey=064fda5ab26dc1dd936f5c6e84b7d3c2&v=3.1&m=getItem&id='
json_data = self._download_json(json_url + video_id, video_id)
item = json_data['item']
title = item['serie_title']
description = item['lead']
if item['season'] > 0:
title += ', sezon ' + str(item['season'])
if item['episode'] > 0:
title += ', odc. ' + str(item['episode'])
formats = []
for video in item['videos']['main']['video_content']:
video_url = video['url']
formats.append({
'format_id': video['profile_name'],
'url': video_url,
'ext': determine_ext(video_url, 'mp4'),
'tbr': self.determine_bitrate(video_url),
'description': description
})
self._sort_formats(formats)
# ISM manifest, doesn't work
'''
json_data = self._download_json('http://player.pl/playlist-vod/' + video_id + '.json', video_id)
for movie in json_data['playlist']['movies'].values():
if type(movie).__name__ == 'dict' and 'episode_id' in movie and video_id == movie['episode_id']:
title = movie['title']
if not movie['one_episode']:
title += ", odc. " + movie['episode']
formats = self._extract_ism_formats(movie['profiles']['wv']['url'], video_id)
formats = self._extract_ism_formats(movie['profiles']['row']['url'], video_id)
'''
extracted_info = {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
'season_id': item['season'],
'episode_id': item['episode'],
}
return extracted_info
class TVNPlayerSeriesIE(InfoExtractor):
IE_NAME = 'tvnplayer:series'
_VALID_URL = r'https?://player\.pl/.*?-odcinki,(?P<id>\d+)/(?:\?.*)?$'
_TESTS = [{
'url': 'http://player.pl/seriale-online/brzydula-odcinki,52/?player&pl_source=pop-up&pl_campaign=logCRM#',
'info_dict': {
'title': 'Brzydula',
'id': '52',
},
'playlist_count': 235,
}, {
'url': 'http://player.pl/seriale-online/singielka-odcinki,3784/',
'info_dict': {
'title': 'Singielka',
'id': '3784',
},
'playlist_count': 198,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage('http://player.pl/informacje,p,%s.html?noAds=1&ajax=1' % playlist_id, playlist_id)
playlist_title = self._html_search_regex(r'<h1>([^<]+)</h1>', webpage, 'series title')
videos_ids = re.findall(r'data-article-id\s*=\s*["\'](\d+).*?class\s*=\s*["\']play', webpage)
entries = [self.url_result('http://player.pl/,%s.html' % video_id, ie=TVNPlayerIE.ie_key()) for video_id in videos_ids]
return self.playlist_result(entries, playlist_id, playlist_title)