[TVN24] Add new extractor

This commit is contained in:
Jakub Wilk 2017-01-11 18:49:40 +01:00
parent 365d136b7c
commit 3e44bb53aa
2 changed files with 48 additions and 0 deletions

View File

@ -984,6 +984,7 @@ from .tvc import (
) )
from .tvigle import TvigleIE from .tvigle import TvigleIE
from .tvland import TVLandIE from .tvland import TVLandIE
from .tvn24 import TVN24IE
from .tvnoe import TVNoeIE from .tvnoe import TVNoeIE
from .tvp import ( from .tvp import (
TVPEmbedIE, TVPEmbedIE,

View File

@ -0,0 +1,47 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class TVN24IE(InfoExtractor):
_VALID_URL = r'http://(?:tvn24bis|(?:www|fakty)\.tvn24)\.pl/.+/(?P<id>[^/]+)\.html'
_TEST = {
'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html',
'md5': 'fbdec753d7bc29d96036808275f2130c',
'info_dict': {
'id': '1584444',
'ext': 'mp4',
'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"',
'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".',
'thumbnail': 're:http://.*[.]jpeg',
}
}
def _real_extract(self, url):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
thumbnail = self._html_search_regex(r'\bdata-poster="(.+?)"', webpage, 'data-poster')
share_params = self._html_search_regex(r'\bdata-share-params="(.+?)"', webpage, 'data-share-params')
share_params = self._parse_json(share_params, page_id)
video_id = share_params['id']
quality_data = self._html_search_regex(r'\bdata-quality="(.+?)"', webpage, 'data-quality')
quality_data = self._parse_json(quality_data, page_id)
formats = []
for format_id, url in quality_data.items():
formats.append({
'format_id': format_id,
'height': int(format_id.rstrip('p')),
'url': url,
'ext': 'mp4',
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'formats': formats,
}