Merge remote-tracking branch 'rg3/master'

This commit is contained in:
Muratcan Simsek 2015-12-06 23:04:48 +02:00
commit 10623012a8
37 changed files with 1021 additions and 515 deletions

View File

@ -319,7 +319,7 @@ which means you can modify it, redistribute it or use it however you like.
--all-formats Download all available video formats --all-formats Download all available video formats
--prefer-free-formats Prefer free video formats unless a specific --prefer-free-formats Prefer free video formats unless a specific
one is requested one is requested
-F, --list-formats List all available formats of specified -F, --list-formats List all available formats of requested
videos videos
--youtube-skip-dash-manifest Do not download the DASH manifests and --youtube-skip-dash-manifest Do not download the DASH manifests and
related data on YouTube videos related data on YouTube videos

View File

@ -15,8 +15,12 @@
- **abc.net.au** - **abc.net.au**
- **Abc7News** - **Abc7News**
- **AcademicEarth:Course** - **AcademicEarth:Course**
- **acast**
- **acast:channel**
- **AddAnime** - **AddAnime**
- **AdobeTV** - **AdobeTV**
- **AdobeTVChannel**
- **AdobeTVShow**
- **AdobeTVVideo** - **AdobeTVVideo**
- **AdultSwim** - **AdultSwim**
- **Aftenposten** - **Aftenposten**
@ -43,6 +47,7 @@
- **arte.tv:future** - **arte.tv:future**
- **AtresPlayer** - **AtresPlayer**
- **ATTTechChannel** - **ATTTechChannel**
- **AudiMedia**
- **audiomack** - **audiomack**
- **audiomack:album** - **audiomack:album**
- **Azubu** - **Azubu**
@ -92,6 +97,7 @@
- **Clipfish** - **Clipfish**
- **cliphunter** - **cliphunter**
- **Clipsyndicate** - **Clipsyndicate**
- **cloudtime**: CloudTime
- **Cloudy** - **Cloudy**
- **Clubic** - **Clubic**
- **Clyp** - **Clyp**
@ -183,6 +189,7 @@
- **freespeech.org** - **freespeech.org**
- **FreeVideo** - **FreeVideo**
- **FunnyOrDie** - **FunnyOrDie**
- **GameInformer**
- **Gamekings** - **Gamekings**
- **GameOne** - **GameOne**
- **gameone:playlist** - **gameone:playlist**
@ -307,7 +314,6 @@
- **MovieClips** - **MovieClips**
- **MovieFap** - **MovieFap**
- **Moviezine** - **Moviezine**
- **movshare**: MovShare
- **MPORA** - **MPORA**
- **MSNBC** - **MSNBC**
- **MTV** - **MTV**
@ -480,6 +486,8 @@
- **Shared**: shared.sx and vivo.sx - **Shared**: shared.sx and vivo.sx
- **ShareSix** - **ShareSix**
- **Sina** - **Sina**
- **skynewsarabia:video**
- **skynewsarabia:video**
- **Slideshare** - **Slideshare**
- **Slutload** - **Slutload**
- **smotri**: Smotri.com - **smotri**: Smotri.com
@ -665,6 +673,7 @@
- **WebOfStories** - **WebOfStories**
- **WebOfStoriesPlaylist** - **WebOfStoriesPlaylist**
- **Weibo** - **Weibo**
- **wholecloud**: WholeCloud
- **Wimp** - **Wimp**
- **Wistia** - **Wistia**
- **WNL** - **WNL**

View File

@ -3,9 +3,15 @@ from __future__ import unicode_literals
from .abc import ABCIE from .abc import ABCIE
from .abc7news import Abc7NewsIE from .abc7news import Abc7NewsIE
from .academicearth import AcademicEarthCourseIE from .academicearth import AcademicEarthCourseIE
from .acast import (
ACastIE,
ACastChannelIE,
)
from .addanime import AddAnimeIE from .addanime import AddAnimeIE
from .adobetv import ( from .adobetv import (
AdobeTVIE, AdobeTVIE,
AdobeTVShowIE,
AdobeTVChannelIE,
AdobeTVVideoIE, AdobeTVVideoIE,
) )
from .adultswim import AdultSwimIE from .adultswim import AdultSwimIE
@ -38,6 +44,7 @@ from .arte import (
) )
from .atresplayer import AtresPlayerIE from .atresplayer import AtresPlayerIE
from .atttechchannel import ATTTechChannelIE from .atttechchannel import ATTTechChannelIE
from .audimedia import AudiMediaIE
from .audiomack import AudiomackIE, AudiomackAlbumIE from .audiomack import AudiomackIE, AudiomackAlbumIE
from .azubu import AzubuIE from .azubu import AzubuIE
from .baidu import BaiduVideoIE from .baidu import BaiduVideoIE
@ -200,6 +207,7 @@ from .freesound import FreesoundIE
from .freespeech import FreespeechIE from .freespeech import FreespeechIE
from .freevideo import FreeVideoIE from .freevideo import FreeVideoIE
from .funnyordie import FunnyOrDieIE from .funnyordie import FunnyOrDieIE
from .gameinformer import GameInformerIE
from .gamekings import GamekingsIE from .gamekings import GamekingsIE
from .gameone import ( from .gameone import (
GameOneIE, GameOneIE,
@ -349,7 +357,6 @@ from .motherless import MotherlessIE
from .motorsport import MotorsportIE from .motorsport import MotorsportIE
from .movieclips import MovieClipsIE from .movieclips import MovieClipsIE
from .moviezine import MoviezineIE from .moviezine import MoviezineIE
from .movshare import MovShareIE
from .mtv import ( from .mtv import (
MTVIE, MTVIE,
MTVServicesEmbeddedIE, MTVServicesEmbeddedIE,
@ -415,7 +422,13 @@ from .noco import NocoIE
from .normalboots import NormalbootsIE from .normalboots import NormalbootsIE
from .nosvideo import NosVideoIE from .nosvideo import NosVideoIE
from .nova import NovaIE from .nova import NovaIE
from .novamov import NovaMovIE from .novamov import (
NovaMovIE,
WholeCloudIE,
NowVideoIE,
VideoWeedIE,
CloudTimeIE,
)
from .nowness import ( from .nowness import (
NownessIE, NownessIE,
NownessPlaylistIE, NownessPlaylistIE,
@ -425,7 +438,6 @@ from .nowtv import (
NowTVIE, NowTVIE,
NowTVListIE, NowTVListIE,
) )
from .nowvideo import NowVideoIE
from .npo import ( from .npo import (
NPOIE, NPOIE,
NPOLiveIE, NPOLiveIE,
@ -554,6 +566,10 @@ from .shahid import ShahidIE
from .shared import SharedIE from .shared import SharedIE
from .sharesix import ShareSixIE from .sharesix import ShareSixIE
from .sina import SinaIE from .sina import SinaIE
from .skynewsarabia import (
SkyNewsArabiaIE,
SkyNewsArabiaArticleIE,
)
from .slideshare import SlideshareIE from .slideshare import SlideshareIE
from .slutload import SlutloadIE from .slutload import SlutloadIE
from .smotri import ( from .smotri import (
@ -732,7 +748,6 @@ from .videofyme import VideofyMeIE
from .videomega import VideoMegaIE from .videomega import VideoMegaIE
from .videopremium import VideoPremiumIE from .videopremium import VideoPremiumIE
from .videott import VideoTtIE from .videott import VideoTtIE
from .videoweed import VideoWeedIE
from .vidme import VidmeIE from .vidme import VidmeIE
from .vidzi import VidziIE from .vidzi import VidziIE
from .vier import VierIE, VierVideosIE from .vier import VierIE, VierVideosIE

View File

@ -0,0 +1,70 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import int_or_none
class ACastBaseIE(InfoExtractor):
_API_BASE_URL = 'https://www.acast.com/api/'
class ACastIE(ACastBaseIE):
IE_NAME = 'acast'
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)'
_TEST = {
'url': 'https://www.acast.com/condenasttraveler/-where-are-you-taipei-101-taiwan',
'md5': 'ada3de5a1e3a2a381327d749854788bb',
'info_dict': {
'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
'ext': 'mp3',
'title': '"Where Are You?": Taipei 101, Taiwan',
'timestamp': 1196172000000,
'description': 'md5:0c5d8201dfea2b93218ea986c91eee6e',
'duration': 211,
}
}
def _real_extract(self, url):
channel, display_id = re.match(self._VALID_URL, url).groups()
cast_data = self._download_json(self._API_BASE_URL + 'channels/%s/acasts/%s/playback' % (channel, display_id), display_id)
return {
'id': compat_str(cast_data['id']),
'display_id': display_id,
'url': cast_data['blings'][0]['audio'],
'title': cast_data['name'],
'description': cast_data.get('description'),
'thumbnail': cast_data.get('image'),
'timestamp': int_or_none(cast_data.get('publishingDate')),
'duration': int_or_none(cast_data.get('duration')),
}
class ACastChannelIE(ACastBaseIE):
IE_NAME = 'acast:channel'
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<id>[^/#?]+)'
_TEST = {
'url': 'https://www.acast.com/condenasttraveler',
'info_dict': {
'id': '50544219-29bb-499e-a083-6087f4cb7797',
'title': 'Condé Nast Traveler Podcast',
'description': 'md5:98646dee22a5b386626ae31866638fbd',
},
'playlist_mincount': 20,
}
@classmethod
def suitable(cls, url):
return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
def _real_extract(self, url):
display_id = self._match_id(url)
channel_data = self._download_json(self._API_BASE_URL + 'channels/%s' % display_id, display_id)
casts = self._download_json(self._API_BASE_URL + 'channels/%s/acasts' % display_id, display_id)
entries = [self.url_result('https://www.acast.com/%s/%s' % (display_id, cast['url']), 'ACast') for cast in casts]
return self.playlist_result(entries, compat_str(channel_data['id']), channel_data['name'], channel_data.get('description'))

View File

@ -1,23 +1,32 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
parse_duration, parse_duration,
unified_strdate, unified_strdate,
str_to_int, str_to_int,
int_or_none,
float_or_none, float_or_none,
ISO639Utils, ISO639Utils,
determine_ext,
) )
class AdobeTVIE(InfoExtractor): class AdobeTVBaseIE(InfoExtractor):
_VALID_URL = r'https?://tv\.adobe\.com/watch/[^/]+/(?P<id>[^/]+)' _API_BASE_URL = 'http://tv.adobe.com/api/v4/'
class AdobeTVIE(AdobeTVBaseIE):
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)'
_TEST = { _TEST = {
'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/', 'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/',
'md5': '9bc5727bcdd55251f35ad311ca74fa1e', 'md5': '9bc5727bcdd55251f35ad311ca74fa1e',
'info_dict': { 'info_dict': {
'id': 'quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop', 'id': '10981',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop', 'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop',
'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311', 'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311',
@ -29,50 +38,106 @@ class AdobeTVIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) language, show_urlname, urlname = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, video_id) if not language:
language = 'en'
player = self._parse_json( video_data = self._download_json(
self._search_regex(r'html5player:\s*({.+?})\s*\n', webpage, 'player'), self._API_BASE_URL + 'episode/get/?language=%s&show_urlname=%s&urlname=%s&disclosure=standard' % (language, show_urlname, urlname),
video_id) urlname)['data'][0]
title = player.get('title') or self._search_regex(
r'data-title="([^"]+)"', webpage, 'title')
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(
self._html_search_meta('datepublished', webpage, 'upload date'))
duration = parse_duration(
self._html_search_meta('duration', webpage, 'duration') or
self._search_regex(
r'Runtime:\s*(\d{2}:\d{2}:\d{2})',
webpage, 'duration', fatal=False))
view_count = str_to_int(self._search_regex(
r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>',
webpage, 'view count'))
formats = [{ formats = [{
'url': source['src'], 'url': source['url'],
'format_id': source.get('quality') or source['src'].split('-')[-1].split('.')[0] or None, 'format_id': source.get('quality_level') or source['url'].split('-')[-1].split('.')[0] or None,
'tbr': source.get('bitrate'), 'width': int_or_none(source.get('width')),
} for source in player['sources']] 'height': int_or_none(source.get('height')),
'tbr': int_or_none(source.get('video_data_rate')),
} for source in video_data['videos']]
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': video_id, 'id': compat_str(video_data['id']),
'title': title, 'title': video_data['title'],
'description': description, 'description': video_data.get('description'),
'thumbnail': thumbnail, 'thumbnail': video_data.get('thumbnail'),
'upload_date': upload_date, 'upload_date': unified_strdate(video_data.get('start_date')),
'duration': duration, 'duration': parse_duration(video_data.get('duration')),
'view_count': view_count, 'view_count': str_to_int(video_data.get('playcount')),
'formats': formats, 'formats': formats,
} }
class AdobeTVPlaylistBaseIE(AdobeTVBaseIE):
def _parse_page_data(self, page_data):
return [self.url_result(self._get_element_url(element_data)) for element_data in page_data]
def _extract_playlist_entries(self, url, display_id):
page = self._download_json(url, display_id)
entries = self._parse_page_data(page['data'])
for page_num in range(2, page['paging']['pages'] + 1):
entries.extend(self._parse_page_data(
self._download_json(url + '&page=%d' % page_num, display_id)['data']))
return entries
class AdobeTVShowIE(AdobeTVPlaylistBaseIE):
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)'
_TEST = {
'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost',
'info_dict': {
'id': '36',
'title': 'The Complete Picture with Julieanne Kost',
'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27',
},
'playlist_mincount': 136,
}
def _get_element_url(self, element_data):
return element_data['urls'][0]
def _real_extract(self, url):
language, show_urlname = re.match(self._VALID_URL, url).groups()
if not language:
language = 'en'
query = 'language=%s&show_urlname=%s' % (language, show_urlname)
show_data = self._download_json(self._API_BASE_URL + 'show/get/?%s' % query, show_urlname)['data'][0]
return self.playlist_result(
self._extract_playlist_entries(self._API_BASE_URL + 'episode/?%s' % query, show_urlname),
compat_str(show_data['id']),
show_data['show_name'],
show_data['show_description'])
class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?'
_TEST = {
'url': 'http://tv.adobe.com/channel/development',
'info_dict': {
'id': 'development',
},
'playlist_mincount': 96,
}
def _get_element_url(self, element_data):
return element_data['url']
def _real_extract(self, url):
language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups()
if not language:
language = 'en'
query = 'language=%s&channel_urlname=%s' % (language, channel_urlname)
if category_urlname:
query += '&category_urlname=%s' % category_urlname
return self.playlist_result(
self._extract_playlist_entries(self._API_BASE_URL + 'show/?%s' % query, channel_urlname),
channel_urlname)
class AdobeTVVideoIE(InfoExtractor): class AdobeTVVideoIE(InfoExtractor):
_VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)' _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
@ -91,28 +156,25 @@ class AdobeTVVideoIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_data = self._download_json(url + '?format=json', video_id)
webpage = self._download_webpage(url, video_id)
player_params = self._parse_json(self._search_regex(
r'var\s+bridge\s*=\s*([^;]+);', webpage, 'player parameters'),
video_id)
formats = [{ formats = [{
'format_id': '%s-%s' % (determine_ext(source['src']), source.get('height')),
'url': source['src'], 'url': source['src'],
'width': source.get('width'), 'width': int_or_none(source.get('width')),
'height': source.get('height'), 'height': int_or_none(source.get('height')),
'tbr': source.get('bitrate'), 'tbr': int_or_none(source.get('bitrate')),
} for source in player_params['sources']] } for source in video_data['sources']]
self._sort_formats(formats)
# For both metadata and downloaded files the duration varies among # For both metadata and downloaded files the duration varies among
# formats. I just pick the max one # formats. I just pick the max one
duration = max(filter(None, [ duration = max(filter(None, [
float_or_none(source.get('duration'), scale=1000) float_or_none(source.get('duration'), scale=1000)
for source in player_params['sources']])) for source in video_data['sources']]))
subtitles = {} subtitles = {}
for translation in player_params.get('translations', []): for translation in video_data.get('translations', []):
lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium']) lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
if lang_id not in subtitles: if lang_id not in subtitles:
subtitles[lang_id] = [] subtitles[lang_id] = []
@ -124,8 +186,9 @@ class AdobeTVVideoIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'title': player_params['title'], 'title': video_data['title'],
'description': self._og_search_description(webpage), 'description': video_data.get('description'),
'thumbnail': video_data['video'].get('poster'),
'duration': duration, 'duration': duration,
'subtitles': subtitles, 'subtitles': subtitles,
} }

View File

@ -0,0 +1,80 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
sanitized_Request,
)
class AudiMediaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?audimedia\.tv/(?:en|de)/vid/(?P<id>[^/?#]+)'
_TEST = {
'url': 'https://audimedia.tv/en/vid/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test',
'md5': '79a8b71c46d49042609795ab59779b66',
'info_dict': {
'id': '1564',
'ext': 'mp4',
'title': '60 Seconds of Audi Sport 104/2015 - WEC Bahrain, Rookie Test',
'description': 'md5:60e5d30a78ced725f7b8d34370762941',
'upload_date': '20151124',
'timestamp': 1448354940,
'duration': 74022,
'view_count': int,
}
}
# extracted from https://audimedia.tv/assets/embed/embedded-player.js (dataSourceAuthToken)
_AUTH_TOKEN = 'e25b42847dba18c6c8816d5d8ce94c326e06823ebf0859ed164b3ba169be97f2'
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
raw_payload = self._search_regex(r'<script[^>]+class="amtv-embed"[^>]+id="([^"]+)"', webpage, 'raw payload')
_, stage_mode, video_id, lang = raw_payload.split('-')
# TODO: handle s and e stage_mode (live streams and ended live streams)
if stage_mode not in ('s', 'e'):
request = sanitized_Request(
'https://audimedia.tv/api/video/v1/videos/%s?embed[]=video_versions&embed[]=thumbnail_image&where[content_language_iso]=%s' % (video_id, lang),
headers={'X-Auth-Token': self._AUTH_TOKEN})
json_data = self._download_json(request, video_id)['results']
formats = []
stream_url_hls = json_data.get('stream_url_hls')
if stream_url_hls:
m3u8_formats = self._extract_m3u8_formats(stream_url_hls, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
stream_url_hds = json_data.get('stream_url_hds')
if stream_url_hds:
f4m_formats = self._extract_f4m_formats(json_data.get('stream_url_hds') + '?hdcore=3.4.0', video_id, -1, f4m_id='hds', fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
for video_version in json_data.get('video_versions'):
video_version_url = video_version.get('download_url') or video_version.get('stream_url')
if not video_version_url:
continue
formats.append({
'url': video_version_url,
'width': int_or_none(video_version.get('width')),
'height': int_or_none(video_version.get('height')),
'abr': int_or_none(video_version.get('audio_bitrate')),
'vbr': int_or_none(video_version.get('video_bitrate')),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': json_data['title'],
'description': json_data.get('subtitle'),
'thumbnail': json_data.get('thumbnail_image', {}).get('file'),
'timestamp': parse_iso8601(json_data.get('publication_date')),
'duration': int_or_none(json_data.get('duration')),
'view_count': int_or_none(json_data.get('view_count')),
'formats': formats,
}

View File

@ -23,7 +23,7 @@ class BBCCoUkIE(InfoExtractor):
IE_NAME = 'bbc.co.uk' IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer' IE_DESC = 'BBC iPlayer'
_ID_REGEX = r'[pb][\da-z]{7}' _ID_REGEX = r'[pb][\da-z]{7}'
_VALID_URL = r'https?://(?:(?:www\.)?bbc\.co\.uk/(?:(?:programmes/(?!articles/)|iplayer(?:/[^/]+)?/(?:episode/|playlist/))|music/clips[/#])|)(?P<id>%s)' % _ID_REGEX _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:programmes/(?!articles/)|iplayer(?:/[^/]+)?/(?:episode/|playlist/))|music/clips[/#])(?P<id>%s)' % _ID_REGEX
_MEDIASELECTOR_URLS = [ _MEDIASELECTOR_URLS = [
# Provides HQ HLS streams with even better quality that pc mediaset but fails # Provides HQ HLS streams with even better quality that pc mediaset but fails
@ -47,9 +47,8 @@ class BBCCoUkIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'b039d07m', 'id': 'b039d07m',
'ext': 'flv', 'ext': 'flv',
'title': 'Kaleidoscope, Leonard Cohen', 'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4',
'description': 'The Canadian poet and songwriter reflects on his musical career.', 'description': 'The Canadian poet and songwriter reflects on his musical career.',
'duration': 1740,
}, },
'params': { 'params': {
# rtmp download # rtmp download
@ -112,7 +111,8 @@ class BBCCoUkIE(InfoExtractor):
'params': { 'params': {
# rtmp download # rtmp download
'skip_download': True, 'skip_download': True,
} },
'skip': 'Episode is no longer available on BBC iPlayer Radio',
}, { }, {
'url': 'http://www.bbc.co.uk/music/clips/p02frcc3', 'url': 'http://www.bbc.co.uk/music/clips/p02frcc3',
'note': 'Audio', 'note': 'Audio',
@ -454,6 +454,7 @@ class BBCCoUkIE(InfoExtractor):
webpage = self._download_webpage(url, group_id, 'Downloading video page') webpage = self._download_webpage(url, group_id, 'Downloading video page')
programme_id = None programme_id = None
duration = None
tviplayer = self._search_regex( tviplayer = self._search_regex(
r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById', r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
@ -473,7 +474,9 @@ class BBCCoUkIE(InfoExtractor):
title = self._og_search_title(webpage) title = self._og_search_title(webpage)
description = self._search_regex( description = self._search_regex(
r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>', r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
webpage, 'description', fatal=False) webpage, 'description', default=None)
if not description:
description = self._html_search_meta('description', webpage)
else: else:
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id) programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
@ -587,6 +590,7 @@ class BBCIE(BBCCoUkIE):
'ext': 'mp4', 'ext': 'mp4',
'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''', 'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
'duration': 56, 'duration': 56,
'description': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -729,6 +733,7 @@ class BBCIE(BBCCoUkIE):
# article with multiple videos embedded with playlist.sxml (e.g. # article with multiple videos embedded with playlist.sxml (e.g.
# http://www.bbc.com/sport/0/football/34475836) # http://www.bbc.com/sport/0/football/34475836)
playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage) playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage)
playlists.extend(re.findall(r'data-media-id="([^"]+/playlist\.sxml)"', webpage))
if playlists: if playlists:
entries = [ entries = [
self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp) self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp)

View File

@ -1,6 +1,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_chr,
compat_ord,
compat_urllib_parse_unquote,
)
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
@ -29,7 +34,24 @@ class BeegIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
video = self._download_json( video = self._download_json(
'http://beeg.com/api/v1/video/%s' % video_id, video_id) 'http://beeg.com/api/v4/video/%s' % video_id, video_id)
def decrypt_key(key):
# Reverse engineered from http://static.beeg.com/cpl/1067.js
a = '8RPUUCS35ZWp3ADnKcSmpH71ZusrROo'
e = compat_urllib_parse_unquote(key)
return ''.join([
compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 25)
for n in range(len(e))])
def decrypt_url(encrypted_url):
encrypted_url = self._proto_relative_url(
encrypted_url.replace('{DATA_MARKERS}', ''), 'http:')
key = self._search_regex(
r'/key=(.*?)%2Cend=', encrypted_url, 'key', default=None)
if not key:
return encrypted_url
return encrypted_url.replace(key, decrypt_key(key))
formats = [] formats = []
for format_id, video_url in video.items(): for format_id, video_url in video.items():
@ -40,7 +62,7 @@ class BeegIE(InfoExtractor):
if not height: if not height:
continue continue
formats.append({ formats.append({
'url': self._proto_relative_url(video_url.replace('{DATA_MARKERS}', ''), 'http:'), 'url': decrypt_url(video_url),
'format_id': format_id, 'format_id': format_id,
'height': int(height), 'height': int(height),
}) })

View File

@ -2,143 +2,109 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import itertools
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_str
compat_etree_fromstring,
)
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
unified_strdate, unescapeHTML,
ExtractorError, ExtractorError,
xpath_text,
) )
class BiliBiliIE(InfoExtractor): class BiliBiliIE(InfoExtractor):
_VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>[0-9]+)/' _VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?'
_TESTS = [{ _TESTS = [{
'url': 'http://www.bilibili.tv/video/av1074402/', 'url': 'http://www.bilibili.tv/video/av1074402/',
'md5': '2c301e4dab317596e837c3e7633e7d86', 'md5': '2c301e4dab317596e837c3e7633e7d86',
'info_dict': { 'info_dict': {
'id': '1074402_part1', 'id': '1554319',
'ext': 'flv', 'ext': 'flv',
'title': '【金坷垃】金泡沫', 'title': '【金坷垃】金泡沫',
'duration': 308, 'duration': 308313,
'upload_date': '20140420', 'upload_date': '20140420',
'thumbnail': 're:^https?://.+\.jpg', 'thumbnail': 're:^https?://.+\.jpg',
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
'timestamp': 1397983878,
'uploader': '菊子桑',
}, },
}, { }, {
'url': 'http://www.bilibili.com/video/av1041170/', 'url': 'http://www.bilibili.com/video/av1041170/',
'info_dict': { 'info_dict': {
'id': '1041170', 'id': '1041170',
'title': '【BD1080P】刀语【诸神&异域】', 'title': '【BD1080P】刀语【诸神&异域】',
'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~',
'uploader': '枫叶逝去',
'timestamp': 1396501299,
}, },
'playlist_count': 9, 'playlist_count': 9,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) mobj = re.match(self._VALID_URL, url)
webpage = self._download_webpage(url, video_id) video_id = mobj.group('id')
page_num = mobj.group('page_num') or '1'
if '(此视频不存在或被删除)' in webpage: view_data = self._download_json(
raise ExtractorError( 'http://api.bilibili.com/view?type=json&appkey=8e9fc618fbd41e28&id=%s&page=%s' % (video_id, page_num),
'The video does not exist or was deleted', expected=True) video_id)
if 'error' in view_data:
raise ExtractorError('%s said: %s' % (self.IE_NAME, view_data['error']), expected=True)
if '>你没有权限浏览! 由于版权相关问题 我们不对您所在的地区提供服务<' in webpage: cid = view_data['cid']
raise ExtractorError( title = unescapeHTML(view_data['title'])
'The video is not available in your region due to copyright reasons',
expected=True)
video_code = self._search_regex( doc = self._download_xml(
r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code') 'http://interface.bilibili.com/v_cdn_play?appkey=8e9fc618fbd41e28&cid=%s' % cid,
cid,
'Downloading page %s/%s' % (page_num, view_data['pages'])
)
title = self._html_search_meta( if xpath_text(doc, './result') == 'error':
'media:title', video_code, 'title', fatal=True) raise ExtractorError('%s said: %s' % (self.IE_NAME, xpath_text(doc, './message')), expected=True)
duration_str = self._html_search_meta(
'duration', video_code, 'duration')
if duration_str is None:
duration = None
else:
duration_mobj = re.match(
r'^T(?:(?P<hours>[0-9]+)H)?(?P<minutes>[0-9]+)M(?P<seconds>[0-9]+)S$',
duration_str)
duration = (
int_or_none(duration_mobj.group('hours'), default=0) * 3600 +
int(duration_mobj.group('minutes')) * 60 +
int(duration_mobj.group('seconds')))
upload_date = unified_strdate(self._html_search_meta(
'uploadDate', video_code, fatal=False))
thumbnail = self._html_search_meta(
'thumbnailUrl', video_code, 'thumbnail', fatal=False)
cid = self._search_regex(r'cid=(\d+)', webpage, 'cid')
entries = [] entries = []
lq_page = self._download_webpage( for durl in doc.findall('./durl'):
'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid, size = xpath_text(durl, ['./filesize', './size'])
video_id,
note='Downloading LQ video info'
)
try:
err_info = json.loads(lq_page)
raise ExtractorError(
'BiliBili said: ' + err_info['error_text'], expected=True)
except ValueError:
pass
lq_doc = compat_etree_fromstring(lq_page)
lq_durls = lq_doc.findall('./durl')
hq_doc = self._download_xml(
'http://interface.bilibili.com/playurl?appkey=1&cid=%s' % cid,
video_id,
note='Downloading HQ video info',
fatal=False,
)
if hq_doc is not False:
hq_durls = hq_doc.findall('./durl')
assert len(lq_durls) == len(hq_durls)
else:
hq_durls = itertools.repeat(None)
i = 1
for lq_durl, hq_durl in zip(lq_durls, hq_durls):
formats = [{ formats = [{
'format_id': 'lq', 'url': durl.find('./url').text,
'quality': 1, 'filesize': int_or_none(size),
'url': lq_durl.find('./url').text, 'ext': 'flv',
'filesize': int_or_none(
lq_durl.find('./size'), get_attr='text'),
}] }]
if hq_durl is not None: backup_urls = durl.find('./backup_url')
formats.append({ if backup_urls is not None:
'format_id': 'hq', for backup_url in backup_urls.findall('./url'):
'quality': 2, formats.append({'url': backup_url.text})
'ext': 'flv', formats.reverse()
'url': hq_durl.find('./url').text,
'filesize': int_or_none(
hq_durl.find('./size'), get_attr='text'),
})
self._sort_formats(formats)
entries.append({ entries.append({
'id': '%s_part%d' % (video_id, i), 'id': '%s_part%s' % (cid, xpath_text(durl, './order')),
'title': title, 'title': title,
'duration': int_or_none(xpath_text(durl, './length'), 1000),
'formats': formats, 'formats': formats,
'duration': duration,
'upload_date': upload_date,
'thumbnail': thumbnail,
}) })
i += 1 info = {
'id': compat_str(cid),
return { 'title': title,
'_type': 'multi_video', 'description': view_data.get('description'),
'entries': entries, 'thumbnail': view_data.get('pic'),
'id': video_id, 'uploader': view_data.get('author'),
'title': title 'timestamp': int_or_none(view_data.get('created')),
'view_count': int_or_none(view_data.get('play')),
'duration': int_or_none(xpath_text(doc, './timelength')),
} }
if len(entries) == 1:
entries[0].update(info)
return entries[0]
else:
info.update({
'_type': 'multi_video',
'id': video_id,
'entries': entries,
})
return info

View File

@ -14,9 +14,10 @@ class BYUtvIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'studio-c-season-5-episode-5', 'id': 'studio-c-season-5-episode-5',
'ext': 'mp4', 'ext': 'mp4',
'description': 'md5:5438d33774b6bdc662f9485a340401cc', 'description': 'md5:e07269172baff037f8e8bf9956bc9747',
'title': 'Season 5 Episode 5', 'title': 'Season 5 Episode 5',
'thumbnail': 're:^https?://.*\.jpg$' 'thumbnail': 're:^https?://.*\.jpg$',
'duration': 1486.486,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,

View File

@ -1,14 +1,9 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext,
int_or_none, int_or_none,
js_to_json, unified_strdate,
parse_iso8601,
remove_end,
) )
@ -21,48 +16,47 @@ class ClipfishIE(InfoExtractor):
'id': '3966754', 'id': '3966754',
'ext': 'mp4', 'ext': 'mp4',
'title': 'FIFA 14 - E3 2013 Trailer', 'title': 'FIFA 14 - E3 2013 Trailer',
'timestamp': 1370938118, 'description': 'Video zu FIFA 14: E3 2013 Trailer',
'upload_date': '20130611', 'upload_date': '20130611',
'duration': 82, 'duration': 82,
'view_count': int,
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) video_info = self._download_json(
'http://www.clipfish.de/devapi/id/%s?format=json&apikey=hbbtv' % video_id,
video_info = self._parse_json( video_id)['items'][0]
js_to_json(self._html_search_regex(
'(?s)videoObject\s*=\s*({.+?});', webpage, 'video object')),
video_id)
formats = [] formats = []
for video_url in re.findall(r'var\s+videourl\s*=\s*"([^"]+)"', webpage):
ext = determine_ext(video_url)
if ext == 'm3u8':
formats.append({
'url': video_url.replace('de.hls.fra.clipfish.de', 'hls.fra.clipfish.de'),
'ext': 'mp4',
'format_id': 'hls',
})
else:
formats.append({
'url': video_url,
'format_id': ext,
})
self._sort_formats(formats)
title = remove_end(self._og_search_title(webpage), ' - Video') m3u8_url = video_info.get('media_videourl_hls')
thumbnail = self._og_search_thumbnail(webpage) if m3u8_url:
duration = int_or_none(video_info.get('length')) formats.append({
timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage, 'upload date')) 'url': m3u8_url.replace('de.hls.fra.clipfish.de', 'hls.fra.clipfish.de'),
'ext': 'mp4',
'format_id': 'hls',
})
mp4_url = video_info.get('media_videourl')
if mp4_url:
formats.append({
'url': mp4_url,
'format_id': 'mp4',
'width': int_or_none(video_info.get('width')),
'height': int_or_none(video_info.get('height')),
'tbr': int_or_none(video_info.get('bitrate')),
})
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': video_info['title'],
'description': video_info.get('descr'),
'formats': formats, 'formats': formats,
'thumbnail': thumbnail, 'thumbnail': video_info.get('media_content_thumbnail_large') or video_info.get('media_thumbnail'),
'duration': duration, 'duration': int_or_none(video_info.get('media_length')),
'timestamp': timestamp, 'upload_date': unified_strdate(video_info.get('pubDate')),
'view_count': int_or_none(video_info.get('media_views'))
} }

View File

@ -167,7 +167,7 @@ class InfoExtractor(object):
"ext" will be calculated from URL if missing "ext" will be calculated from URL if missing
automatic_captions: Like 'subtitles', used by the YoutubeIE for automatic_captions: Like 'subtitles', used by the YoutubeIE for
automatically generated captions automatically generated captions
duration: Length of the video in seconds, as an integer. duration: Length of the video in seconds, as an integer or float.
view_count: How many users have watched the video on the platform. view_count: How many users have watched the video on the platform.
like_count: Number of positive ratings of the video like_count: Number of positive ratings of the video
dislike_count: Number of negative ratings of the video dislike_count: Number of negative ratings of the video

View File

@ -37,8 +37,8 @@ class FC2IE(InfoExtractor):
'params': { 'params': {
'username': 'ytdl@yt-dl.org', 'username': 'ytdl@yt-dl.org',
'password': '(snip)', 'password': '(snip)',
'skip': 'requires actual password' },
} 'skip': 'requires actual password',
}, { }, {
'url': 'http://video.fc2.com/en/a/content/20130926eZpARwsF', 'url': 'http://video.fc2.com/en/a/content/20130926eZpARwsF',
'only_matching': True, 'only_matching': True,

View File

@ -0,0 +1,43 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import int_or_none
class GameInformerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>.+)\.aspx'
_TEST = {
'url': 'http://www.gameinformer.com/b/features/archive/2015/09/26/replay-animal-crossing.aspx',
'info_dict': {
'id': '4515472681001',
'ext': 'm3u8',
'title': 'Replay - Animal Crossing',
'description': 'md5:2e211891b215c85d061adc7a4dd2d930',
'timestamp': 1443457610706,
},
'params': {
# m3u8 download
'skip_download': True,
},
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
bc_api_url = self._search_regex(r"getVideo\('([^']+)'", webpage, 'brightcove api url')
json_data = self._download_json(
bc_api_url + '&video_fields=id,name,shortDescription,publishedDate,videoStillURL,length,IOSRenditions',
display_id)
return {
'id': compat_str(json_data['id']),
'display_id': display_id,
'url': json_data['IOSRenditions'][0]['url'],
'title': json_data['name'],
'description': json_data.get('shortDescription'),
'timestamp': int_or_none(json_data.get('publishedDate')),
'duration': int_or_none(json_data.get('length')),
}

View File

@ -1,19 +1,62 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .mtv import MTVServicesInfoExtractor from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_age_limit,
url_basename,
)
class GametrailersIE(MTVServicesInfoExtractor): class GametrailersIE(InfoExtractor):
_VALID_URL = r'http://www\.gametrailers\.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)' _VALID_URL = r'http://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)'
_TEST = { _TEST = {
'url': 'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer', 'url': 'http://www.gametrailers.com/videos/view/gametrailers-com/116437-Just-Cause-3-Review',
'md5': '4c8e67681a0ea7ec241e8c09b3ea8cf7', 'md5': 'f28c4efa0bdfaf9b760f6507955b6a6a',
'info_dict': { 'info_dict': {
'id': '70e9a5d7-cf25-4a10-9104-6f3e7342ae0d', 'id': '2983958',
'ext': 'mp4', 'ext': 'mp4',
'title': 'E3 2013: Debut Trailer', 'display_id': '116437-Just-Cause-3-Review',
'description': 'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!', 'title': 'Just Cause 3 - Review',
'description': 'It\'s a lot of fun to shoot at things and then watch them explode in Just Cause 3, but should there be more to the experience than that?',
}, },
} }
_FEED_URL = 'http://www.gametrailers.com/feeds/mrss' def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
title = self._html_search_regex(
r'<title>(.+?)\|', webpage, 'title').strip()
embed_url = self._proto_relative_url(
self._search_regex(
r'src=\'(//embed.gametrailers.com/embed/[^\']+)\'', webpage,
'embed url'),
scheme='http:')
video_id = url_basename(embed_url)
embed_page = self._download_webpage(embed_url, video_id)
embed_vars_json = self._search_regex(
r'(?s)var embedVars = (\{.*?\})\s*</script>', embed_page,
'embed vars')
info = self._parse_json(embed_vars_json, video_id)
formats = []
for media in info['media']:
if media['mediaPurpose'] == 'play':
formats.append({
'url': media['uri'],
'height': media['height'],
'width:': media['width'],
})
self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
'title': title,
'formats': formats,
'thumbnail': info.get('thumbUri'),
'description': self._og_search_description(webpage),
'duration': int_or_none(info.get('videoLengthInSeconds')),
'age_limit': parse_age_limit(info.get('audienceRating')),
}

View File

@ -339,6 +339,7 @@ class GenericIE(InfoExtractor):
'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ', 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
'ext': 'mp4', 'ext': 'mp4',
'title': '2cc213299525360.mov', # that's what we get 'title': '2cc213299525360.mov', # that's what we get
'duration': 238.231,
}, },
'add_ie': ['Ooyala'], 'add_ie': ['Ooyala'],
}, },
@ -350,6 +351,7 @@ class GenericIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': '"Steve Jobs: Man in the Machine" trailer', 'title': '"Steve Jobs: Man in the Machine" trailer',
'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."', 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
'duration': 135.427,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -960,8 +962,9 @@ class GenericIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs', 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
'ext': 'mp4', 'ext': 'mp4',
'description': 'VIDEO: Index/Match versus VLOOKUP.', 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
'title': 'This is what separates the Excel masters from the wannabes', 'title': 'This is what separates the Excel masters from the wannabes',
'duration': 191.933,
}, },
'params': { 'params': {
# m3u8 downloads # m3u8 downloads
@ -1501,7 +1504,7 @@ class GenericIE(InfoExtractor):
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage)) re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
if mobj is not None: if mobj is not None:
return OoyalaIE._build_url_result(mobj.group('ec')) return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
# Look for multiple Ooyala embeds on SBN network websites # Look for multiple Ooyala embeds on SBN network websites
mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage) mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
@ -1509,7 +1512,7 @@ class GenericIE(InfoExtractor):
embeds = self._parse_json(mobj.group(1), video_id, fatal=False) embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
if embeds: if embeds:
return _playlist_from_matches( return _playlist_from_matches(
embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala') embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
# Look for Aparat videos # Look for Aparat videos
mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage) mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)

View File

@ -18,6 +18,8 @@ class GrouponIE(InfoExtractor):
'id': 'tubGNycTo_9Uxg82uESj4i61EYX8nyuf', 'id': 'tubGNycTo_9Uxg82uESj4i61EYX8nyuf',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Bikram Yoga Huntington Beach | Orange County', 'title': 'Bikram Yoga Huntington Beach | Orange County',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'duration': 44.961,
}, },
}], }],
'params': { 'params': {

View File

@ -16,6 +16,7 @@ class HowcastIE(InfoExtractor):
'description': 'md5:dbe792e5f6f1489027027bf2eba188a3', 'description': 'md5:dbe792e5f6f1489027027bf2eba188a3',
'timestamp': 1276081287, 'timestamp': 1276081287,
'upload_date': '20100609', 'upload_date': '20100609',
'duration': 56.823,
}, },
'params': { 'params': {
# m3u8 download # m3u8 download

View File

@ -28,15 +28,12 @@ class HypemIE(InfoExtractor):
track_id = self._match_id(url) track_id = self._match_id(url)
data = {'ax': 1, 'ts': time.time()} data = {'ax': 1, 'ts': time.time()}
data_encoded = compat_urllib_parse.urlencode(data) request = sanitized_Request(url + '?' + compat_urllib_parse.urlencode(data))
complete_url = url + "?" + data_encoded
request = sanitized_Request(complete_url)
response, urlh = self._download_webpage_handle( response, urlh = self._download_webpage_handle(
request, track_id, 'Downloading webpage with the url') request, track_id, 'Downloading webpage with the url')
cookie = urlh.headers.get('Set-Cookie', '')
html_tracks = self._html_search_regex( html_tracks = self._html_search_regex(
r'(?ms)<script type="application/json" id="displayList-data">\s*(.*?)\s*</script>', r'(?ms)<script type="application/json" id="displayList-data">(.+?)</script>',
response, 'tracks') response, 'tracks')
try: try:
track_list = json.loads(html_tracks) track_list = json.loads(html_tracks)
@ -46,15 +43,14 @@ class HypemIE(InfoExtractor):
key = track['key'] key = track['key']
track_id = track['id'] track_id = track['id']
artist = track['artist']
title = track['song'] title = track['song']
serve_url = "http://hypem.com/serve/source/%s/%s" % (track_id, key)
request = sanitized_Request( request = sanitized_Request(
serve_url, '', {'Content-Type': 'application/json'}) 'http://hypem.com/serve/source/%s/%s' % (track_id, key),
request.add_header('cookie', cookie) '', {'Content-Type': 'application/json'})
song_data = self._download_json(request, track_id, 'Downloading metadata') song_data = self._download_json(request, track_id, 'Downloading metadata')
final_url = song_data["url"] final_url = song_data['url']
artist = track.get('artist')
return { return {
'id': track_id, 'id': track_id,

View File

@ -205,9 +205,8 @@ class IqiyiIE(InfoExtractor):
def get_enc_key(self, swf_url, video_id): def get_enc_key(self, swf_url, video_id):
# TODO: automatic key extraction # TODO: automatic key extraction
# last update at 2015-10-22 for Zombie::bite # last update at 2015-12-06 for Zombie::bite
# '7223c67061dbea1259d0ceb44f44b6d62288f4f80c972170de5201d2321060270e05'[2:66][0::2] enc_key = '3719f6a1da83ee0aee3488d8802d7696'[::-1]
enc_key = '2c76de15dcb44bd28ff0927d50d31620'
return enc_key return enc_key
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1,23 +1,25 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import os
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse from ..utils import (
from ..utils import sanitized_Request sanitized_Request,
url_basename,
)
class KeezMoviesIE(InfoExtractor): class KeezMoviesIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)' _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)'
_TEST = { _TEST = {
'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711', 'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
'md5': '6e297b7e789329923fcf83abb67c9289', 'md5': '1c1e75d22ffa53320f45eeb07bc4cdc0',
'info_dict': { 'info_dict': {
'id': '1214711', 'id': '1214711',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Petite Asian Lady Mai Playing In Bathtub', 'title': 'Petite Asian Lady Mai Playing In Bathtub',
'age_limit': 18, 'age_limit': 18,
'thumbnail': 're:^https?://.*\.jpg$',
} }
} }
@ -36,21 +38,29 @@ class KeezMoviesIE(InfoExtractor):
video_title = self._html_search_regex( video_title = self._html_search_regex(
r'<h1 [^>]*>([^<]+)', webpage, 'title') r'<h1 [^>]*>([^<]+)', webpage, 'title')
video_url = self._html_search_regex( flashvars = self._parse_json(self._search_regex(
r'(?s)html5VideoPlayer = .*?src="([^"]+)"', webpage, 'video URL') r'var\s+flashvars\s*=\s*([^;]+);', webpage, 'flashvars'), video_id)
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:] formats = []
format = path.split('/')[4].split('_')[:2] for height in (180, 240, 480):
format = "-".join(format) if flashvars.get('quality_%dp' % height):
video_url = flashvars['quality_%dp' % height]
a_format = {
'url': video_url,
'height': height,
'format_id': '%dp' % height,
}
filename_parts = url_basename(video_url).split('_')
if len(filename_parts) >= 2 and re.match(r'\d+[Kk]', filename_parts[1]):
a_format['tbr'] = int(filename_parts[1][:-1])
formats.append(a_format)
age_limit = self._rta_search(webpage) age_limit = self._rta_search(webpage)
return { return {
'id': video_id, 'id': video_id,
'title': video_title, 'title': video_title,
'url': video_url, 'formats': formats,
'ext': extension,
'format': format,
'format_id': format,
'age_limit': age_limit, 'age_limit': age_limit,
'thumbnail': flashvars.get('image_url')
} }

View File

@ -154,10 +154,10 @@ class MetacafeIE(InfoExtractor):
# Extract URL, uploader and title from webpage # Extract URL, uploader and title from webpage
self.report_extraction(video_id) self.report_extraction(video_id)
video_url = None video_url = None
mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage) mobj = re.search(r'(?m)&(?:media|video)URL=([^&]+)', webpage)
if mobj is not None: if mobj is not None:
mediaURL = compat_urllib_parse_unquote(mobj.group(1)) mediaURL = compat_urllib_parse_unquote(mobj.group(1))
video_ext = mediaURL[-3:] video_ext = determine_ext(mediaURL)
# Extract gdaKey if available # Extract gdaKey if available
mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
@ -229,7 +229,7 @@ class MetacafeIE(InfoExtractor):
age_limit = ( age_limit = (
18 18
if re.search(r'"contentRating":"restricted"', webpage) if re.search(r'(?:"contentRating":|"rating",)"restricted"', webpage)
else 0) else 0)
if isinstance(video_url, list): if isinstance(video_url, list):

View File

@ -64,7 +64,8 @@ class MixcloudIE(InfoExtractor):
preview_url = self._search_regex( preview_url = self._search_regex(
r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url') r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
song_url = preview_url.replace('/previews/', '/c/originals/') song_url = re.sub(r'audiocdn(\d+)', r'stream\1', preview_url)
song_url = song_url.replace('/previews/', '/c/originals/')
if not self._check_url(song_url, track_id, 'mp3'): if not self._check_url(song_url, track_id, 'mp3'):
song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
if not self._check_url(song_url, track_id, 'm4a'): if not self._check_url(song_url, track_id, 'm4a'):

View File

@ -1,27 +0,0 @@
from __future__ import unicode_literals
from .novamov import NovaMovIE
class MovShareIE(NovaMovIE):
IE_NAME = 'movshare'
IE_DESC = 'MovShare'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'movshare\.(?:net|sx|ag)'}
_HOST = 'www.movshare.net'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>'
_DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>'
_TEST = {
'url': 'http://www.movshare.net/video/559e28be54d96',
'md5': 'abd31a2132947262c50429e1d16c1bfd',
'info_dict': {
'id': '559e28be54d96',
'ext': 'flv',
'title': 'dissapeared image',
'description': 'optical illusion dissapeared image magic illusion',
}
}

View File

@ -1,63 +1,102 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
remove_end,
parse_duration, parse_duration,
int_or_none,
xpath_text,
xpath_attr,
) )
class NBAIE(InfoExtractor): class NBAIE(InfoExtractor):
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)/?(?:/index\.html)?(?:\?.*)?$' _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)?video/(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$'
_TESTS = [{ _TESTS = [{
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html', 'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
'md5': 'c0edcfc37607344e2ff8f13c378c88a4', 'md5': '9e7729d3010a9c71506fd1248f74e4f4',
'info_dict': { 'info_dict': {
'id': '0021200253-okc-bkn-recap.nba', 'id': '0021200253-okc-bkn-recap',
'ext': 'mp4', 'ext': 'flv',
'title': 'Thunder vs. Nets', 'title': 'Thunder vs. Nets',
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.', 'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
'duration': 181, 'duration': 181,
'timestamp': 1354638466,
'upload_date': '20121204',
}, },
}, { }, {
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/', 'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://watch.nba.com/nba/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba', 'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
'info_dict': { 'info_dict': {
'id': '0041400301-cle-atl-recap.nba', 'id': '0041400301-cle-atl-recap',
'ext': 'mp4', 'ext': 'mp4',
'title': 'NBA GAME TIME | Video: Hawks vs. Cavaliers Game 1', 'title': 'Hawks vs. Cavaliers Game 1',
'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d', 'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
'duration': 228, 'duration': 228,
}, 'timestamp': 1432134543,
'params': { 'upload_date': '20150520',
'skip_download': True,
} }
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) path, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, video_id) if path.startswith('nba/'):
path = path[3:]
video_info = self._download_xml('http://www.nba.com/%s.xml' % path, video_id)
video_id = xpath_text(video_info, 'slug')
title = xpath_text(video_info, 'headline')
description = xpath_text(video_info, 'description')
duration = parse_duration(xpath_text(video_info, 'length'))
timestamp = int_or_none(xpath_attr(video_info, 'dateCreated', 'uts'))
video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4' thumbnails = []
for image in video_info.find('images'):
thumbnails.append({
'id': image.attrib.get('cut'),
'url': image.text,
'width': int_or_none(image.attrib.get('width')),
'height': int_or_none(image.attrib.get('height')),
})
shortened_video_id = video_id.rpartition('/')[2] formats = []
title = remove_end( for video_file in video_info.findall('.//file'):
self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com') video_url = video_file.text
if video_url.startswith('/'):
description = self._og_search_description(webpage) continue
duration_str = self._html_search_meta( if video_url.endswith('.m3u8'):
'duration', webpage, 'duration', default=None) m3u8_formats = self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', fatal=False)
if not duration_str: if m3u8_formats:
duration_str = self._html_search_regex( formats.extend(m3u8_formats)
r'Duration:</b>\s*(\d+:\d+)', webpage, 'duration', fatal=False) elif video_url.endswith('.f4m'):
duration = parse_duration(duration_str) f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.4.1.1', video_id, f4m_id='hds', fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
else:
key = video_file.attrib.get('bitrate')
format_info = {
'format_id': key,
'url': video_url,
}
mobj = re.search(r'(\d+)x(\d+)(?:_(\d+))?', key)
if mobj:
format_info.update({
'width': int(mobj.group(1)),
'height': int(mobj.group(2)),
'tbr': int_or_none(mobj.group(3)),
})
formats.append(format_info)
self._sort_formats(formats)
return { return {
'id': shortened_video_id, 'id': video_id,
'url': video_url,
'title': title, 'title': title,
'description': description, 'description': description,
'duration': duration, 'duration': duration,
'timestamp': timestamp,
'thumbnails': thumbnails,
'formats': formats,
} }

View File

@ -92,3 +92,89 @@ class NovaMovIE(InfoExtractor):
'title': title, 'title': title,
'description': description 'description': description
} }
class WholeCloudIE(NovaMovIE):
IE_NAME = 'wholecloud'
IE_DESC = 'WholeCloud'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': '(?:wholecloud\.net|movshare\.(?:net|sx|ag))'}
_HOST = 'www.wholecloud.net'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>'
_DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>'
_TEST = {
'url': 'http://www.wholecloud.net/video/559e28be54d96',
'md5': 'abd31a2132947262c50429e1d16c1bfd',
'info_dict': {
'id': '559e28be54d96',
'ext': 'flv',
'title': 'dissapeared image',
'description': 'optical illusion dissapeared image magic illusion',
}
}
class NowVideoIE(NovaMovIE):
IE_NAME = 'nowvideo'
IE_DESC = 'NowVideo'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'}
_HOST = 'www.nowvideo.to'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_FILEKEY_REGEX = r'var fkzd="([^"]+)";'
_TITLE_REGEX = r'<h4>([^<]+)</h4>'
_DESCRIPTION_REGEX = r'</h4>\s*<p>([^<]+)</p>'
_TEST = {
'url': 'http://www.nowvideo.to/video/0mw0yow7b6dxa',
'md5': 'f8fbbc8add72bd95b7850c6a02fc8817',
'info_dict': {
'id': '0mw0yow7b6dxa',
'ext': 'flv',
'title': 'youtubedl test video _BaW_jenozKc.mp4',
'description': 'Description',
}
}
class VideoWeedIE(NovaMovIE):
IE_NAME = 'videoweed'
IE_DESC = 'VideoWeed'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'videoweed\.(?:es|com)'}
_HOST = 'www.videoweed.es'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>'
_TEST = {
'url': 'http://www.videoweed.es/file/b42178afbea14',
'md5': 'abd31a2132947262c50429e1d16c1bfd',
'info_dict': {
'id': 'b42178afbea14',
'ext': 'flv',
'title': 'optical illusion dissapeared image magic illusion',
'description': ''
},
}
class CloudTimeIE(NovaMovIE):
IE_NAME = 'cloudtime'
IE_DESC = 'CloudTime'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'cloudtime\.to'}
_HOST = 'www.cloudtime.to'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<div[^>]+class=["\']video_det["\'][^>]*>\s*<strong>([^<]+)</strong>'
_TEST = None

View File

@ -71,7 +71,7 @@ class NowTVBaseIE(InfoExtractor):
class NowTVIE(NowTVBaseIE): class NowTVIE(NowTVBaseIE):
_VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:list/[^/]+/)?(?P<id>[^/]+)/(?:player|preview)' _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P<id>[^/]+)/(?:player|preview)'
_TESTS = [{ _TESTS = [{
# rtl # rtl
@ -190,6 +190,9 @@ class NowTVIE(NowTVBaseIE):
}, { }, {
'url': 'http://www.nowtv.de/rtl2/echtzeit/list/aktuell/schnelles-geld-am-ende-der-welt/player', 'url': 'http://www.nowtv.de/rtl2/echtzeit/list/aktuell/schnelles-geld-am-ende-der-welt/player',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.nowtv.de/rtl2/zuhause-im-glueck/jahr/2015/11/eine-erschuetternde-diagnose/player',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1,28 +0,0 @@
from __future__ import unicode_literals
from .novamov import NovaMovIE
class NowVideoIE(NovaMovIE):
IE_NAME = 'nowvideo'
IE_DESC = 'NowVideo'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'}
_HOST = 'www.nowvideo.to'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_FILEKEY_REGEX = r'var fkzd="([^"]+)";'
_TITLE_REGEX = r'<h4>([^<]+)</h4>'
_DESCRIPTION_REGEX = r'</h4>\s*<p>([^<]+)</p>'
_TEST = {
'url': 'http://www.nowvideo.ch/video/0mw0yow7b6dxa',
'md5': 'f8fbbc8add72bd95b7850c6a02fc8817',
'info_dict': {
'id': '0mw0yow7b6dxa',
'ext': 'flv',
'title': 'youtubedl test video _BaW_jenozKc.mp4',
'description': 'Description',
}
}

View File

@ -1,108 +1,69 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import json
import base64 import base64
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
unescapeHTML,
ExtractorError,
determine_ext,
int_or_none, int_or_none,
float_or_none,
ExtractorError,
unsmuggle_url,
) )
from ..compat import compat_urllib_parse
class OoyalaBaseIE(InfoExtractor): class OoyalaBaseIE(InfoExtractor):
def _extract_result(self, info, more_info): def _extract(self, content_tree_url, video_id, domain='example.org'):
embedCode = info['embedCode'] content_tree = self._download_json(content_tree_url, video_id)['content_tree']
video_url = info.get('ipad_url') or info['url'] metadata = content_tree[list(content_tree)[0]]
embed_code = metadata['embed_code']
if determine_ext(video_url) == 'm3u8': pcode = metadata.get('asset_pcode') or embed_code
formats = self._extract_m3u8_formats(video_url, embedCode, ext='mp4') video_info = {
else: 'id': embed_code,
formats = [{ 'title': metadata['title'],
'url': video_url, 'description': metadata.get('description'),
'ext': 'mp4', 'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
}] 'duration': float_or_none(metadata.get('duration'), 1000),
return {
'id': embedCode,
'title': unescapeHTML(info['title']),
'formats': formats,
'description': unescapeHTML(more_info['description']),
'thumbnail': more_info['promo'],
} }
def _extract(self, player_url, video_id): formats = []
player = self._download_webpage(player_url, video_id) for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'):
mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
player, 'mobile player url')
# Looks like some videos are only available for particular devices
# (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0
# is only available for ipad)
# Working around with fetching URLs for all the devices found starting with 'unknown'
# until we succeed or eventually fail for each device.
devices = re.findall(r'device\s*=\s*"([^"]+)";', player)
devices.remove('unknown')
devices.insert(0, 'unknown')
for device in devices:
mobile_player = self._download_webpage(
'%s&device=%s' % (mobile_url, device), video_id,
'Downloading mobile player JS for %s device' % device)
videos_info = self._search_regex(
r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
mobile_player, 'info', fatal=False, default=None)
if videos_info:
break
if not videos_info:
formats = []
auth_data = self._download_json( auth_data = self._download_json(
'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=mp4,webm' % (video_id, video_id), 'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?' % (pcode, embed_code) + compat_urllib_parse.urlencode({'domain': domain, 'supportedFormats': supported_format}),
video_id) video_id, 'Downloading %s JSON' % supported_format)
cur_auth_data = auth_data['authorization_data'][video_id] cur_auth_data = auth_data['authorization_data'][embed_code]
for stream in cur_auth_data['streams']: if cur_auth_data['authorized']:
formats.append({ for stream in cur_auth_data['streams']:
'url': base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8'), url = base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8')
'ext': stream.get('delivery_type'), delivery_type = stream['delivery_type']
'format': stream.get('video_codec'), if delivery_type == 'remote_asset':
'format_id': stream.get('profile'), video_info['url'] = url
'width': int_or_none(stream.get('width')), return video_info
'height': int_or_none(stream.get('height')), if delivery_type == 'hls':
'abr': int_or_none(stream.get('audio_bitrate')), formats.extend(self._extract_m3u8_formats(url, embed_code, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
'vbr': int_or_none(stream.get('video_bitrate')), elif delivery_type == 'hds':
}) formats.extend(self._extract_f4m_formats(url, embed_code, -1, 'hds', fatal=False))
if formats: else:
return { formats.append({
'id': video_id, 'url': url,
'formats': formats, 'ext': stream.get('delivery_type'),
'title': 'Ooyala video', 'vcodec': stream.get('video_codec'),
} 'format_id': '%s-%s-%sp' % (stream.get('profile'), delivery_type, stream.get('height')),
'width': int_or_none(stream.get('width')),
'height': int_or_none(stream.get('height')),
'abr': int_or_none(stream.get('audio_bitrate')),
'vbr': int_or_none(stream.get('video_bitrate')),
'fps': float_or_none(stream.get('framerate')),
})
else:
raise ExtractorError('%s said: %s' % (self.IE_NAME, cur_auth_data['message']), expected=True)
self._sort_formats(formats)
if not cur_auth_data['authorized']: video_info['formats'] = formats
raise ExtractorError(cur_auth_data['message'], expected=True) return video_info
if not videos_info:
raise ExtractorError('Unable to extract info')
videos_info = videos_info.replace('\\"', '"')
videos_more_info = self._search_regex(
r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"')
videos_info = json.loads(videos_info)
videos_more_info = json.loads(videos_more_info)
if videos_more_info.get('lineup'):
videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
return {
'_type': 'playlist',
'id': video_id,
'title': unescapeHTML(videos_more_info['title']),
'entries': videos,
}
else:
return self._extract_result(videos_info[0], videos_more_info)
class OoyalaIE(OoyalaBaseIE): class OoyalaIE(OoyalaBaseIE):
@ -117,6 +78,7 @@ class OoyalaIE(OoyalaBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Explaining Data Recovery from Hard Drives and SSDs', 'title': 'Explaining Data Recovery from Hard Drives and SSDs',
'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', 'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
'duration': 853.386,
}, },
}, { }, {
# Only available for ipad # Only available for ipad
@ -125,7 +87,7 @@ class OoyalaIE(OoyalaBaseIE):
'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0', 'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Simulation Overview - Levels of Simulation', 'title': 'Simulation Overview - Levels of Simulation',
'description': '', 'duration': 194.948,
}, },
}, },
{ {
@ -136,7 +98,8 @@ class OoyalaIE(OoyalaBaseIE):
'info_dict': { 'info_dict': {
'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx', 'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Ooyala video', 'title': 'Divide Tool Path.mp4',
'duration': 204.405,
} }
} }
] ]
@ -151,9 +114,11 @@ class OoyalaIE(OoyalaBaseIE):
ie=cls.ie_key()) ie=cls.ie_key())
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
embed_code = self._match_id(url) embed_code = self._match_id(url)
player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code domain = smuggled_data.get('domain')
return self._extract(player_url, embed_code) content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/embed_code/%s/%s' % (embed_code, embed_code)
return self._extract(content_tree_url, embed_code, domain)
class OoyalaExternalIE(OoyalaBaseIE): class OoyalaExternalIE(OoyalaBaseIE):
@ -170,7 +135,7 @@ class OoyalaExternalIE(OoyalaBaseIE):
.*?&pcode= .*?&pcode=
) )
(?P<pcode>.+?) (?P<pcode>.+?)
(&|$) (?:&|$)
''' '''
_TEST = { _TEST = {
@ -179,7 +144,7 @@ class OoyalaExternalIE(OoyalaBaseIE):
'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG', 'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
'ext': 'mp4', 'ext': 'mp4',
'title': 'dm_140128_30for30Shorts___JudgingJewellv2', 'title': 'dm_140128_30for30Shorts___JudgingJewellv2',
'description': '', 'duration': 1302000,
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
@ -188,9 +153,6 @@ class OoyalaExternalIE(OoyalaBaseIE):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) partner_id, video_id, pcode = re.match(self._VALID_URL, url).groups()
partner_id = mobj.group('partner_id') content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/external_id/%s/%s:%s' % (pcode, partner_id, video_id)
video_id = mobj.group('id') return self._extract(content_tree_url, video_id)
pcode = mobj.group('pcode')
player_url = 'http://player.ooyala.com/player.js?externalId=%s:%s&pcode=%s' % (partner_id, video_id, pcode)
return self._extract(player_url, video_id)

View File

@ -1,5 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import json import json
import random import random
import collections import collections
@ -14,6 +15,7 @@ from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_duration, parse_duration,
qualities,
sanitized_Request, sanitized_Request,
) )
@ -140,15 +142,28 @@ class PluralsightIE(PluralsightBaseIE):
'low': {'width': 640, 'height': 480}, 'low': {'width': 640, 'height': 480},
'medium': {'width': 848, 'height': 640}, 'medium': {'width': 848, 'height': 640},
'high': {'width': 1024, 'height': 768}, 'high': {'width': 1024, 'height': 768},
'high-widescreen': {'width': 1280, 'height': 720},
} }
QUALITIES_PREFERENCE = ('low', 'medium', 'high', 'high-widescreen',)
quality_key = qualities(QUALITIES_PREFERENCE)
AllowedQuality = collections.namedtuple('AllowedQuality', ['ext', 'qualities']) AllowedQuality = collections.namedtuple('AllowedQuality', ['ext', 'qualities'])
ALLOWED_QUALITIES = ( ALLOWED_QUALITIES = (
AllowedQuality('webm', ('high',)), AllowedQuality('webm', ['high', ]),
AllowedQuality('mp4', ('low', 'medium', 'high',)), AllowedQuality('mp4', ['low', 'medium', 'high', ]),
) )
# Some courses also offer widescreen resolution for high quality (see
# https://github.com/rg3/youtube-dl/issues/7766)
widescreen = True if re.search(
r'courseSupportsWidescreenVideoFormats\s*:\s*true', webpage) else False
best_quality = 'high-widescreen' if widescreen else 'high'
if widescreen:
for allowed_quality in ALLOWED_QUALITIES:
allowed_quality.qualities.append(best_quality)
# In order to minimize the number of calls to ViewClip API and reduce # In order to minimize the number of calls to ViewClip API and reduce
# the probability of being throttled or banned by Pluralsight we will request # the probability of being throttled or banned by Pluralsight we will request
# only single format until formats listing was explicitly requested. # only single format until formats listing was explicitly requested.
@ -157,19 +172,19 @@ class PluralsightIE(PluralsightBaseIE):
else: else:
def guess_allowed_qualities(): def guess_allowed_qualities():
req_format = self._downloader.params.get('format') or 'best' req_format = self._downloader.params.get('format') or 'best'
req_format_split = req_format.split('-') req_format_split = req_format.split('-', 1)
if len(req_format_split) > 1: if len(req_format_split) > 1:
req_ext, req_quality = req_format_split req_ext, req_quality = req_format_split
for allowed_quality in ALLOWED_QUALITIES: for allowed_quality in ALLOWED_QUALITIES:
if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities: if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities:
return (AllowedQuality(req_ext, (req_quality, )), ) return (AllowedQuality(req_ext, (req_quality, )), )
req_ext = 'webm' if self._downloader.params.get('prefer_free_formats') else 'mp4' req_ext = 'webm' if self._downloader.params.get('prefer_free_formats') else 'mp4'
return (AllowedQuality(req_ext, ('high', )), ) return (AllowedQuality(req_ext, (best_quality, )), )
allowed_qualities = guess_allowed_qualities() allowed_qualities = guess_allowed_qualities()
formats = [] formats = []
for ext, qualities in allowed_qualities: for ext, qualities_ in allowed_qualities:
for quality in qualities: for quality in qualities_:
f = QUALITIES[quality].copy() f = QUALITIES[quality].copy()
clip_post = { clip_post = {
'a': author, 'a': author,
@ -205,6 +220,7 @@ class PluralsightIE(PluralsightBaseIE):
'url': clip_url, 'url': clip_url,
'ext': ext, 'ext': ext,
'format_id': format_id, 'format_id': format_id,
'quality': quality_key(quality),
}) })
formats.append(f) formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -0,0 +1,117 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
parse_iso8601,
parse_duration,
)
class SkyNewsArabiaBaseIE(InfoExtractor):
_IMAGE_BASE_URL = 'http://www.skynewsarabia.com/web/images'
def _call_api(self, path, value):
return self._download_json('http://api.skynewsarabia.com/web/rest/v2/%s/%s.json' % (path, value), value)
def _get_limelight_media_id(self, url):
return self._search_regex(r'/media/[^/]+/([a-z0-9]{32})', url, 'limelight media id')
def _get_image_url(self, image_path_template, width='1600', height='1200'):
return self._IMAGE_BASE_URL + image_path_template.format(width=width, height=height)
def _extract_video_info(self, video_data):
video_id = compat_str(video_data['id'])
topic = video_data.get('topicTitle')
return {
'_type': 'url_transparent',
'url': 'limelight:media:%s' % self._get_limelight_media_id(video_data['videoUrl'][0]['url']),
'id': video_id,
'title': video_data['headline'],
'description': video_data.get('summary'),
'thumbnail': self._get_image_url(video_data['mediaAsset']['imageUrl']),
'timestamp': parse_iso8601(video_data.get('date')),
'duration': parse_duration(video_data.get('runTime')),
'tags': video_data.get('tags', []),
'categories': [topic] if topic else [],
'webpage_url': 'http://www.skynewsarabia.com/web/video/%s' % video_id,
'ie_key': 'LimelightMedia',
}
class SkyNewsArabiaIE(SkyNewsArabiaBaseIE):
IE_NAME = 'skynewsarabia:video'
_VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/video/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.skynewsarabia.com/web/video/794902/%D9%86%D8%B5%D9%81-%D9%85%D9%84%D9%8A%D9%88%D9%86-%D9%85%D8%B5%D8%A8%D8%A7%D8%AD-%D8%B4%D8%AC%D8%B1%D8%A9-%D9%83%D8%B1%D9%8A%D8%B3%D9%85%D8%A7%D8%B3',
'info_dict': {
'id': '794902',
'ext': 'flv',
'title': 'نصف مليون مصباح على شجرة كريسماس',
'description': 'md5:22f1b27f0850eeb10c7e59b1f16eb7c6',
'upload_date': '20151128',
'timestamp': 1448697198,
'duration': 2119,
},
'params': {
# rtmp download
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._call_api('video', video_id)
return self._extract_video_info(video_data)
class SkyNewsArabiaArticleIE(SkyNewsArabiaBaseIE):
IE_NAME = 'skynewsarabia:video'
_VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/article/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.skynewsarabia.com/web/article/794549/%D8%A7%D9%94%D8%AD%D8%AF%D8%A7%D8%AB-%D8%A7%D9%84%D8%B4%D8%B1%D9%82-%D8%A7%D9%84%D8%A7%D9%94%D9%88%D8%B3%D8%B7-%D8%AE%D8%B1%D9%8A%D8%B7%D8%A9-%D8%A7%D9%84%D8%A7%D9%94%D9%84%D8%B9%D8%A7%D8%A8-%D8%A7%D9%84%D8%B0%D9%83%D9%8A%D8%A9',
'info_dict': {
'id': '794549',
'ext': 'flv',
'title': 'بالفيديو.. ألعاب ذكية تحاكي واقع المنطقة',
'description': 'md5:0c373d29919a851e080ee4edd0c5d97f',
'upload_date': '20151126',
'timestamp': 1448559336,
'duration': 281.6,
},
'params': {
# rtmp download
'skip_download': True,
},
}, {
'url': 'http://www.skynewsarabia.com/web/article/794844/%D8%A7%D8%B3%D8%AA%D9%87%D8%AF%D8%A7%D9%81-%D9%82%D9%88%D8%A7%D8%B1%D8%A8-%D8%A7%D9%94%D8%B3%D9%84%D8%AD%D8%A9-%D9%84%D9%85%D9%8A%D9%84%D9%8A%D8%B4%D9%8A%D8%A7%D8%AA-%D8%A7%D9%84%D8%AD%D9%88%D8%AB%D9%8A-%D9%88%D8%B5%D8%A7%D9%84%D8%AD',
'info_dict': {
'id': '794844',
'title': 'إحباط تهريب أسلحة لميليشيات الحوثي وصالح بجنوب اليمن',
'description': 'md5:5c927b8b2e805796e7f693538d96fc7e',
},
'playlist_mincount': 2,
}]
def _real_extract(self, url):
article_id = self._match_id(url)
article_data = self._call_api('article', article_id)
media_asset = article_data['mediaAsset']
if media_asset['type'] == 'VIDEO':
topic = article_data.get('topicTitle')
return {
'_type': 'url_transparent',
'url': 'limelight:media:%s' % self._get_limelight_media_id(media_asset['videoUrl'][0]['url']),
'id': article_id,
'title': article_data['headline'],
'description': article_data.get('summary'),
'thumbnail': self._get_image_url(media_asset['imageUrl']),
'timestamp': parse_iso8601(article_data.get('date')),
'tags': article_data.get('tags', []),
'categories': [topic] if topic else [],
'webpage_url': url,
'ie_key': 'LimelightMedia',
}
entries = [self._extract_video_info(item) for item in article_data.get('inlineItems', []) if item['type'] == 'VIDEO']
return self.playlist_result(entries, article_id, article_data['headline'], article_data.get('summary'))

View File

@ -11,7 +11,7 @@ from ..utils import (
class SrfIE(InfoExtractor): class SrfIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.srf\.ch/play(?:er)?/tv/[^/]+/video/(?P<display_id>[^?]+)\?id=|tp\.srgssr\.ch/p/flash\?urn=urn:srf:ais:video:)(?P<id>[0-9a-f\-]{36})' _VALID_URL = r'https?://(?:www\.srf\.ch/play(?:er)?/(?:tv|radio)/[^/]+/(?P<media_type>video|audio)/(?P<display_id>[^?]+)\?id=|tp\.srgssr\.ch/p/flash\?urn=urn:srf:ais:video:)(?P<id>[0-9a-f\-]{36})'
_TESTS = [{ _TESTS = [{
'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5', 'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
'md5': '4cd93523723beff51bb4bee974ee238d', 'md5': '4cd93523723beff51bb4bee974ee238d',
@ -35,6 +35,20 @@ class SrfIE(InfoExtractor):
'title': 'Jaguar XK120, Shadow und Tornado-Dampflokomotive', 'title': 'Jaguar XK120, Shadow und Tornado-Dampflokomotive',
'timestamp': 1373493600, 'timestamp': 1373493600,
}, },
}, {
'url': 'http://www.srf.ch/play/radio/hoerspielarchiv-srf-musikwelle/audio/saegel-ohni-wind-von-jakob-stebler?id=415bf3d3-6429-4de7-968d-95866e37cfbc',
'md5': '',
'info_dict': {
'id': '415bf3d3-6429-4de7-968d-95866e37cfbc',
'display_id': 'saegel-ohni-wind-von-jakob-stebler',
'ext': 'mp3',
'upload_date': '20080518',
'title': '«Sägel ohni Wind» von Jakob Stebler',
'timestamp': 1211112000,
},
'params': {
'skip_download': True, # requires rtmpdump
},
}, { }, {
'url': 'http://www.srf.ch/player/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5', 'url': 'http://www.srf.ch/player/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
'only_matching': True, 'only_matching': True,
@ -44,11 +58,13 @@ class SrfIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) mobj = re.match(self._VALID_URL, url)
display_id = re.match(self._VALID_URL, url).group('display_id') or video_id video_id = mobj.group('id')
media_type = mobj.group('media_type')
display_id = mobj.group('display_id') or video_id
video_data = self._download_xml( video_data = self._download_xml(
'http://il.srgssr.ch/integrationlayer/1.0/ue/srf/video/play/%s.xml' % video_id, 'http://il.srgssr.ch/integrationlayer/1.0/ue/srf/%s/play/%s.xml' % (media_type, video_id),
display_id) display_id)
title = xpath_text( title = xpath_text(
@ -64,7 +80,7 @@ class SrfIE(InfoExtractor):
for url_node in item.findall('url'): for url_node in item.findall('url'):
quality = url_node.attrib['quality'] quality = url_node.attrib['quality']
full_url = url_node.text full_url = url_node.text
original_ext = determine_ext(full_url) original_ext = determine_ext(full_url).lower()
format_id = '%s-%s' % (quality, item.attrib['protocol']) format_id = '%s-%s' % (quality, item.attrib['protocol'])
if original_ext == 'f4m': if original_ext == 'f4m':
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(

View File

@ -16,6 +16,7 @@ class TeachingChannelIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'A History of Teaming', 'title': 'A History of Teaming',
'description': 'md5:2a9033db8da81f2edffa4c99888140b3', 'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
'duration': 422.255,
}, },
'params': { 'params': {
# m3u8 download # m3u8 download

View File

@ -1,80 +1,103 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError from ..utils import (
ExtractorError,
int_or_none,
parse_iso8601,
)
class TriluliluIE(InfoExtractor): class TriluliluIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?trilulilu\.ro/(?:video-[^/]+/)?(?P<id>[^/#\?]+)' _VALID_URL = r'https?://(?:(?:www|m)\.)?trilulilu\.ro/(?:[^/]+/)?(?P<id>[^/#\?]+)'
_TEST = { _TESTS = [{
'url': 'http://www.trilulilu.ro/video-animatie/big-buck-bunny-1', 'url': 'http://www.trilulilu.ro/big-buck-bunny-1',
'md5': 'c1450a00da251e2769b74b9005601cac', 'md5': '68da087b676a6196a413549212f60cc6',
'info_dict': { 'info_dict': {
'id': 'ae2899e124140b', 'id': 'ae2899e124140b',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Big Buck Bunny', 'title': 'Big Buck Bunny',
'description': ':) pentru copilul din noi', 'description': ':) pentru copilul din noi',
'uploader_id': 'chipy',
'upload_date': '20120304',
'timestamp': 1330830647,
'uploader': 'chipy',
'view_count': int,
'like_count': int,
'comment_count': int,
}, },
} }, {
'url': 'http://www.trilulilu.ro/adena-ft-morreti-inocenta',
'md5': '929dfb8729dc71750463af88bbbbf4a4',
'info_dict': {
'id': 'f299710e3c91c5',
'ext': 'mp4',
'title': 'Adena ft. Morreti - Inocenta',
'description': 'pop music',
'uploader_id': 'VEVOmixt',
'upload_date': '20151204',
'uploader': 'VEVOmixt',
'timestamp': 1449187937,
'view_count': int,
'like_count': int,
'comment_count': int,
},
}]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) media_info = self._download_json('http://m.trilulilu.ro/%s?format=json' % display_id, display_id)
if re.search(r'Fişierul nu este disponibil pentru vizionare în ţara dumneavoastră', webpage): age_limit = 0
raise ExtractorError( errors = media_info.get('errors', {})
'This video is not available in your country.', expected=True) if errors.get('friends'):
elif re.search('Fişierul poate fi accesat doar de către prietenii lui', webpage):
raise ExtractorError('This video is private.', expected=True) raise ExtractorError('This video is private.', expected=True)
elif errors.get('geoblock'):
raise ExtractorError('This video is not available in your country.', expected=True)
elif errors.get('xxx_unlogged'):
age_limit = 18
flashvars_str = self._search_regex( media_class = media_info.get('class')
r'block_flash_vars\s*=\s*(\{[^\}]+\})', webpage, 'flashvars', fatal=False, default=None) if media_class not in ('video', 'audio'):
raise ExtractorError('not a video or an audio')
if flashvars_str: user = media_info.get('user', {})
flashvars = self._parse_json(flashvars_str, display_id)
thumbnail = media_info.get('cover_url')
if thumbnail:
thumbnail.format(width='1600', height='1200')
# TODO: get correct ext for audio files
stream_type = media_info.get('stream_type')
formats = [{
'url': media_info['href'],
'ext': stream_type,
}]
if media_info.get('is_hd'):
formats.append({
'format_id': 'hd',
'url': media_info['hrefhd'],
'ext': stream_type,
})
if media_class == 'audio':
formats[0]['vcodec'] = 'none'
else: else:
raise ExtractorError( formats[0]['format_id'] = 'sd'
'This page does not contain videos', expected=True)
if flashvars['isMP3'] == 'true':
raise ExtractorError(
'Audio downloads are currently not supported', expected=True)
video_id = flashvars['hash']
title = self._og_search_title(webpage)
thumbnail = self._og_search_thumbnail(webpage)
description = self._og_search_description(webpage, default=None)
format_url = ('http://fs%(server)s.trilulilu.ro/%(hash)s/'
'video-formats2' % flashvars)
format_doc = self._download_xml(
format_url, video_id,
note='Downloading formats',
errnote='Error while downloading formats')
video_url_template = (
'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
'&source=site&hash=%(hash)s&username=%(userid)s&'
'key=ministhebest&format=%%s&sig=&exp=' %
flashvars)
formats = [
{
'format_id': fnode.text.partition('-')[2],
'url': video_url_template % fnode.text,
'ext': fnode.text.partition('-')[0]
}
for fnode in format_doc.findall('./formats/format')
]
return { return {
'id': video_id, 'id': media_info['identifier'].split('|')[1],
'display_id': display_id, 'display_id': display_id,
'formats': formats, 'formats': formats,
'title': title, 'title': media_info['title'],
'description': description, 'description': media_info.get('description'),
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'uploader_id': user.get('username'),
'uploader': user.get('fullname'),
'timestamp': parse_iso8601(media_info.get('published'), ' '),
'duration': int_or_none(media_info.get('duration')),
'view_count': int_or_none(media_info.get('count_views')),
'like_count': int_or_none(media_info.get('count_likes')),
'comment_count': int_or_none(media_info.get('count_comments')),
'age_limit': age_limit,
} }

View File

@ -15,6 +15,7 @@ class ViceIE(InfoExtractor):
'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp', 'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
'ext': 'mp4', 'ext': 'mp4',
'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', 'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
'duration': 725.983,
}, },
'params': { 'params': {
# Requires ffmpeg (m3u8 manifest) # Requires ffmpeg (m3u8 manifest)

View File

@ -1,26 +0,0 @@
from __future__ import unicode_literals
from .novamov import NovaMovIE
class VideoWeedIE(NovaMovIE):
IE_NAME = 'videoweed'
IE_DESC = 'VideoWeed'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'videoweed\.(?:es|com)'}
_HOST = 'www.videoweed.es'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>'
_TEST = {
'url': 'http://www.videoweed.es/file/b42178afbea14',
'md5': 'abd31a2132947262c50429e1d16c1bfd',
'info_dict': {
'id': 'b42178afbea14',
'ext': 'flv',
'title': 'optical illusion dissapeared image magic illusion',
'description': ''
},
}

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2015.11.27.1' __version__ = '2015.12.06'