Merge pull request #19 from rg3/master

update
This commit is contained in:
siddht1 2016-11-03 12:03:38 +05:30 committed by GitHub
commit aca072cd3c
3 changed files with 84 additions and 16 deletions

View File

@ -2453,8 +2453,21 @@ class GenericIE(InfoExtractor):
entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
elif ext == 'f4m':
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
elif re.search(r'(?i)\.ism/manifest', video_url):
entry_info_dict['formats'] = self._extract_ism_formats(video_url, video_id)
elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
# Just matching .ism/manifest is not enough to be reliably sure
# whether it's actually an ISM manifest or some other streaming
# manifest since there are various streaming URL formats
# possible (see [1]) as well as some other shenanigans like
# .smil/manifest URLs that actually serve an ISM (see [2]) and
# so on.
# Thus the most reasonable way to solve this is to delegate
# to generic extractor in order to look into the contents of
# the manifest itself.
# 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
# 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
entry_info_dict = self.url_result(
smuggle_url(video_url, {'to_generic': True}),
GenericIE.ie_key())
else:
entry_info_dict['url'] = video_url

View File

@ -125,6 +125,14 @@ class RadioCanadaIE(InfoExtractor):
f4m_id='hds', fatal=False))
self._sort_formats(formats)
subtitles = {}
closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5')
if closed_caption_url:
subtitles['fr'] = [{
'url': closed_caption_url,
'ext': determine_ext(closed_caption_url, 'vtt'),
}]
return {
'id': video_id,
'title': get_meta('Title'),
@ -135,6 +143,7 @@ class RadioCanadaIE(InfoExtractor):
'season_number': int_or_none('SrcSaison'),
'episode_number': int_or_none('SrcEpisode'),
'upload_date': unified_strdate(get_meta('Date')),
'subtitles': subtitles,
'formats': formats,
}

View File

@ -1,17 +1,24 @@
# coding: utf-8
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
int_or_none,
parse_iso8601,
str_or_none,
urlencode_postdata,
clean_html,
)
class ShahidIE(InfoExtractor):
_VALID_URL = r'https?://shahid\.mbc\.net/ar/episode/(?P<id>\d+)/?'
_NETRC_MACHINE = 'shahid'
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?P<type>episode|movie)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html',
'info_dict': {
@ -27,18 +34,54 @@ class ShahidIE(InfoExtractor):
# m3u8 download
'skip_download': True,
}
}, {
'url': 'https://shahid.mbc.net/ar/movie/151746/%D8%A7%D9%84%D9%82%D9%86%D8%A7%D8%B5%D8%A9.html',
'only_matching': True
}, {
# shahid plus subscriber only
'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html',
'only_matching': True
}]
def _call_api(self, path, video_id, note):
data = self._download_json(
'http://api.shahid.net/api/v1_1/' + path, video_id, note, query={
'apiKey': 'sh@hid0nlin3',
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
}).get('data', {})
def _real_initialize(self):
email, password = self._get_login_info()
if email is None:
return
try:
user_data = self._download_json(
'https://shahid.mbc.net/wd/service/users/login',
None, 'Logging in', data=json.dumps({
'email': email,
'password': password,
'basic': 'false',
}).encode('utf-8'), headers={
'Content-Type': 'application/json; charset=UTF-8',
})['user']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
fail_data = self._parse_json(
e.cause.read().decode('utf-8'), None, fatal=False)
if fail_data:
faults = fail_data.get('faults', [])
faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')])
if faults_message:
raise ExtractorError(faults_message, expected=True)
raise
self._download_webpage(
'https://shahid.mbc.net/populateContext',
None, 'Populate Context', data=urlencode_postdata({
'firstName': user_data['firstName'],
'lastName': user_data['lastName'],
'userName': user_data['email'],
'csg_user_name': user_data['email'],
'subscriberId': user_data['id'],
'sessionId': user_data['sessionId'],
}))
def _get_api_data(self, response):
data = response.get('data', {})
error = data.get('error')
if error:
@ -49,11 +92,11 @@ class ShahidIE(InfoExtractor):
return data
def _real_extract(self, url):
video_id = self._match_id(url)
page_type, video_id = re.match(self._VALID_URL, url).groups()
player = self._call_api(
'Content/Episode/%s' % video_id,
video_id, 'Downloading player JSON')
player = self._get_api_data(self._download_json(
'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-player.html' % video_id,
video_id, 'Downloading player JSON'))
if player.get('drm'):
raise ExtractorError('This video is DRM protected.', expected=True)
@ -61,9 +104,12 @@ class ShahidIE(InfoExtractor):
formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
self._sort_formats(formats)
video = self._call_api(
'episode/%s' % video_id, video_id,
'Downloading video JSON')['episode']
video = self._get_api_data(self._download_json(
'http://api.shahid.net/api/v1_1/%s/%s' % (page_type, video_id),
video_id, 'Downloading video JSON', query={
'apiKey': 'sh@hid0nlin3',
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
}))[page_type]
title = video['title']
categories = [