[tvnplayer] Add extractor

2017-03-19 01:04:24 +01:00 · 2017-03-19 01:04:24 +01:00 · fc8e22df8f
commit fc8e22df8f
parent f68ef1e2ab
2 changed files with 136 additions and 0 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -1028,6 +1028,11 @@ from .tvigle import TvigleIE
 from .tvland import TVLandIE
 from .tvn24 import TVN24IE
 from .tvnoe import TVNoeIE
+from .tvnplayer import (
+    TVNIE,
+    TVNPlayerIE,
+    TVNPlayerSeriesIE,
+)
 from .tvp import (
    TVPEmbedIE,
    TVPIE,
--- a/youtube_dl/extractor/tvnplayer.py
+++ b/youtube_dl/extractor/tvnplayer.py
@ -0,0 +1,131 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+
+class TVNIE(InfoExtractor):
+    IE_NAME = 'tvn'
+    _VALID_URL = r'https?://(?:[^/]+\.)?tvn\.pl/.*?,(?P<id>\d+),o\.html'
+
+    _TEST = {
+        'url': 'http://nawspolnej.tvn.pl/odcinki-online/odcinek-2440,71587,o.html',
+        'md5': '1de06bc87774c334ac473f79ee4a5719',
+        'info_dict': {
+            'id': '70618',
+            'ext': 'mp4',
+            'title': 'Na Wspólnej, odc. 2440',
+            'description': 'md5:73b915646094286a6b3c159c79c67e38',
+        },
+    }
+
+    def _real_extract(self, url):
+        page_id = self._match_id(url)
+        webpage = self._download_webpage(url, page_id)
+        video_id = self._search_regex([
+            r'/player.pl/.*?,(\d+),autoplay.html',
+            r'var\s+var_id\s+=\s+[\'"](\d+)'], webpage, 'video id')
+        return self.url_result('http://player.pl/,%s.html' % video_id, ie=TVNPlayerIE.ie_key())
+
+
+class TVNPlayerIE(InfoExtractor):
+    IE_NAME = 'tvnplayer'
+    _VALID_URL = r'https?://player\.pl/.*?,(?P<id>\d+)(?:,\w+)?\.html'
+
+    _TEST = {
+        'url': 'http://player.pl/seriale-online/na-wspolnej-odcinki,144/odcinek-2436,S00E2436,70614.html',
+        'md5': '1e1457420045d36c488c94cf35d8e9cb',
+        'info_dict': {
+            'id': '70614',
+            'ext': 'mp4',
+            'title': 'Na Wspólnej, odc. 2436',
+            'description': 'md5:651cbbc050c3716200cb3f61c24e85b5',
+        },
+    }
+
+
+    def determine_bitrate(self, url):
+        str_bitrate = url.partition("?")[0].rpartition('.')[0].rpartition("_")[-1]
+        return int(str_bitrate)/1000
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        json_url = 'http://player.pl/api/?platform=ConnectedTV&terminal=Panasonic&format=json&authKey=064fda5ab26dc1dd936f5c6e84b7d3c2&v=3.1&m=getItem&id='
+        json_data = self._download_json(json_url + video_id, video_id)
+        item = json_data['item']
+        title = item['serie_title']
+        description = item['lead']
+        if item['season'] > 0:
+            title += ', sezon ' + str(item['season'])
+        if item['episode'] > 0:
+            title += ', odc. ' + str(item['episode'])
+        formats = []
+        for video in item['videos']['main']['video_content']:
+            video_url = video['url']
+            formats.append({
+                'format_id': video['profile_name'],
+                'url': video_url,
+                'ext': determine_ext(video_url, 'mp4'),
+                'tbr': self.determine_bitrate(video_url),
+                'description': description
+            })
+
+        self._sort_formats(formats)
+
+        # ISM manifest, doesn't work
+        '''
+        json_data = self._download_json('http://player.pl/playlist-vod/' + video_id + '.json', video_id)
+        for movie in json_data['playlist']['movies'].values():
+          if type(movie).__name__ == 'dict' and 'episode_id' in movie and video_id == movie['episode_id']:
+            title = movie['title']
+            if not movie['one_episode']:
+              title += ", odc. " + movie['episode']
+            formats = self._extract_ism_formats(movie['profiles']['wv']['url'], video_id)
+            formats = self._extract_ism_formats(movie['profiles']['row']['url'], video_id)
+        '''
+
+        extracted_info = {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'formats': formats,
+            'season_id': item['season'],
+            'episode_id': item['episode'],
+        }
+
+        return extracted_info
+
+
+class TVNPlayerSeriesIE(InfoExtractor):
+    IE_NAME = 'tvnplayer:series'
+    _VALID_URL = r'https?://player\.pl/.*?-odcinki,(?P<id>\d+)/(?:\?.*)?$'
+
+    _TESTS = [{
+        'url': 'http://player.pl/seriale-online/brzydula-odcinki,52/?player&pl_source=pop-up&pl_campaign=logCRM#',
+        'info_dict': {
+            'title': 'Brzydula',
+            'id': '52',
+        },
+        'playlist_count': 235,
+    }, {
+        'url': 'http://player.pl/seriale-online/singielka-odcinki,3784/',
+        'info_dict': {
+            'title': 'Singielka',
+            'id': '3784',
+        },
+        'playlist_count': 198,
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        webpage = self._download_webpage('http://player.pl/informacje,p,%s.html?noAds=1&ajax=1' % playlist_id, playlist_id)
+
+        playlist_title = self._html_search_regex(r'<h1>([^<]+)</h1>', webpage, 'series title')
+
+        videos_ids = re.findall(r'data-article-id\s*=\s*["\'](\d+).*?class\s*=\s*["\']play', webpage)
+        entries = [self.url_result('http://player.pl/,%s.html' % video_id, ie=TVNPlayerIE.ie_key()) for video_id in videos_ids]
+
+        return self.playlist_result(entries, playlist_id, playlist_title)