universcience 2

This commit is contained in:
flatgreen 2016-08-25 16:56:36 +02:00
parent e2e08a82d0
commit ae33167571
3 changed files with 52 additions and 11 deletions

View File

@ -0,0 +1,38 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class CanalUIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://yourextractor.com/watch/42',
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
'info_dict': {
'id': '42',
'ext': 'mp4',
'title': 'Video title goes here',
'thumbnail': 're:^https?://.*\.jpg$',
# TODO more properties, either as:
# * A value
# * MD5 checksum; start the string with md5:
# * A regular expression; start the string with re:
# * Any Python type (for example int or float)
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
# TODO more code goes here, for example ...
title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
return {
'id': video_id,
'title': title,
'description': self._og_search_description(webpage),
'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
# TODO more properties (see youtube_dl/extractor/common.py)
}

View File

@ -116,6 +116,7 @@ from .camdemy import (
from .camwithher import CamWithHerIE from .camwithher import CamWithHerIE
from .canalplus import CanalplusIE from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE from .canalc2 import Canalc2IE
from .canalu import CanalUIE
from .canvas import CanvasIE from .canvas import CanvasIE
from .carambatv import ( from .carambatv import (
CarambaTVIE, CarambaTVIE,

View File

@ -8,11 +8,12 @@ from ..utils import (
xpath_element, xpath_element,
xpath_attr, xpath_attr,
clean_html, clean_html,
update_url_query,
) )
class UniverscienceIE(InfoExtractor): class UniverscienceIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?universcience\.tv/video-(.*)-(?P<id>[0-9]+)\.html' _VALID_URL = r'https?://(?:www\.)?universcience\.tv/video-.*-(?P<id>[0-9]+)\.html'
_TEST = { _TEST = {
'url': 'http://www.universcience.tv/video-haro-sur-les-loups-o-5466.html', 'url': 'http://www.universcience.tv/video-haro-sur-les-loups-o-5466.html',
'info_dict': { 'info_dict': {
@ -34,8 +35,10 @@ class UniverscienceIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
APIKey = self._html_search_regex(r'["\']APIKey["\'](.*)["\'](?P<APIKey>.*)["\']', webpage, 'APIKey', group='APIKey') APIKey = self._html_search_regex(r'["\']APIKey["\'].*["\'](?P<APIKey>.*)["\']', webpage, 'APIKey', group='APIKey')
url_get_media = 'http://universcience-webtv2-services-pad.brainsonic.com/rest/getMedia?APIKey=' + APIKey + '&byMediaId=' + video_id url_get_media = update_url_query(
'http://universcience-webtv2-services-pad.brainsonic.com/rest/getMedia',
{'APIKey': APIKey, 'byMediaId': video_id})
xml = self._download_xml(url_get_media, video_id) xml = self._download_xml(url_get_media, video_id)
path_media = xpath_element(xml, './medias/media', fatal=True) path_media = xpath_element(xml, './medias/media', fatal=True)
@ -58,20 +61,18 @@ class UniverscienceIE(InfoExtractor):
for media_source in xml.findall(path_media_source): for media_source in xml.findall(path_media_source):
format_url = xpath_text(media_source, 'source', fatal=True) format_url = xpath_text(media_source, 'source', fatal=True)
media_label = xpath_attr(media_source, './streaming_type', 'label') media_label = xpath_attr(media_source, './streaming_type', 'label')
media_width = self._search_regex( media_width = int_or_none(self._search_regex(r'(\d*) x \d*', media_label, 'width', default=None))
r'.* (\d*) x \d*', media_label, 'width', default='None', fatal=False) media_height = int_or_none(self._search_regex(r'\d* x (\d*)', media_label, 'height', default=None))
media_height = self._search_regex(
r'.* \d* x (\d*)', media_label, 'height', default='None', fatal=False)
media_label = self._search_regex( media_label = self._search_regex(
r'(.*) (\d* x \d*)', media_label, 'media_label', default=media_label, fatal=False) r'(.*) (\d* x \d*)', media_label, 'media_label', default=media_label, fatal=False)
if (media_label == 'HLS') or (media_label == 'm3u8'): if media_label in ('HLS', 'm3u8'):
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False)) format_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False))
else: else:
format_info = { format_info = {
'width': int_or_none(media_width), 'width': media_width,
'height': int_or_none(media_height), 'height': media_height,
'tbr': int_or_none(xpath_attr(media_source, './streaming_type', 'bitrate')), 'tbr': int_or_none(xpath_attr(media_source, './streaming_type', 'bitrate')),
# 'vcodec': xpath_attr -> bug sur regexp? # 'vcodec': xpath_attr -> bug sur regexp?
'vcodec': media_source.find('streaming_type').get('html5_codec'), 'vcodec': media_source.find('streaming_type').get('html5_codec'),
@ -81,6 +82,7 @@ class UniverscienceIE(InfoExtractor):
formats.append(format_info) formats.append(format_info)
podcast_url = xpath_text(path_media, './podcast_url') podcast_url = xpath_text(path_media, './podcast_url')
if podcast_url is not None:
formats.append({'format_id': 'podcast', 'vcodec': 'none', 'url': podcast_url}) formats.append({'format_id': 'podcast', 'vcodec': 'none', 'url': podcast_url})
self._sort_formats(formats) self._sort_formats(formats)