diff --git a/README.md b/README.md
index df419abe8..0b224f0be 100644
--- a/README.md
+++ b/README.md
@@ -319,7 +319,7 @@ which means you can modify it, redistribute it or use it however you like.
--all-formats Download all available video formats
--prefer-free-formats Prefer free video formats unless a specific
one is requested
- -F, --list-formats List all available formats of specified
+ -F, --list-formats List all available formats of requested
videos
--youtube-skip-dash-manifest Do not download the DASH manifests and
related data on YouTube videos
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 1df408610..bf26fecd7 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -15,8 +15,12 @@
- **abc.net.au**
- **Abc7News**
- **AcademicEarth:Course**
+ - **acast**
+ - **acast:channel**
- **AddAnime**
- **AdobeTV**
+ - **AdobeTVChannel**
+ - **AdobeTVShow**
- **AdobeTVVideo**
- **AdultSwim**
- **Aftenposten**
@@ -43,6 +47,7 @@
- **arte.tv:future**
- **AtresPlayer**
- **ATTTechChannel**
+ - **AudiMedia**
- **audiomack**
- **audiomack:album**
- **Azubu**
@@ -92,6 +97,7 @@
- **Clipfish**
- **cliphunter**
- **Clipsyndicate**
+ - **cloudtime**: CloudTime
- **Cloudy**
- **Clubic**
- **Clyp**
@@ -183,6 +189,7 @@
- **freespeech.org**
- **FreeVideo**
- **FunnyOrDie**
+ - **GameInformer**
- **Gamekings**
- **GameOne**
- **gameone:playlist**
@@ -307,7 +314,6 @@
- **MovieClips**
- **MovieFap**
- **Moviezine**
- - **movshare**: MovShare
- **MPORA**
- **MSNBC**
- **MTV**
@@ -480,6 +486,8 @@
- **Shared**: shared.sx and vivo.sx
- **ShareSix**
- **Sina**
+ - **skynewsarabia:video**
+ - **skynewsarabia:video**
- **Slideshare**
- **Slutload**
- **smotri**: Smotri.com
@@ -665,6 +673,7 @@
- **WebOfStories**
- **WebOfStoriesPlaylist**
- **Weibo**
+ - **wholecloud**: WholeCloud
- **Wimp**
- **Wistia**
- **WNL**
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 947b83683..3db5cd6d9 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -3,9 +3,15 @@ from __future__ import unicode_literals
from .abc import ABCIE
from .abc7news import Abc7NewsIE
from .academicearth import AcademicEarthCourseIE
+from .acast import (
+ ACastIE,
+ ACastChannelIE,
+)
from .addanime import AddAnimeIE
from .adobetv import (
AdobeTVIE,
+ AdobeTVShowIE,
+ AdobeTVChannelIE,
AdobeTVVideoIE,
)
from .adultswim import AdultSwimIE
@@ -38,6 +44,7 @@ from .arte import (
)
from .atresplayer import AtresPlayerIE
from .atttechchannel import ATTTechChannelIE
+from .audimedia import AudiMediaIE
from .audiomack import AudiomackIE, AudiomackAlbumIE
from .azubu import AzubuIE
from .baidu import BaiduVideoIE
@@ -200,6 +207,7 @@ from .freesound import FreesoundIE
from .freespeech import FreespeechIE
from .freevideo import FreeVideoIE
from .funnyordie import FunnyOrDieIE
+from .gameinformer import GameInformerIE
from .gamekings import GamekingsIE
from .gameone import (
GameOneIE,
@@ -349,7 +357,6 @@ from .motherless import MotherlessIE
from .motorsport import MotorsportIE
from .movieclips import MovieClipsIE
from .moviezine import MoviezineIE
-from .movshare import MovShareIE
from .mtv import (
MTVIE,
MTVServicesEmbeddedIE,
@@ -415,7 +422,13 @@ from .noco import NocoIE
from .normalboots import NormalbootsIE
from .nosvideo import NosVideoIE
from .nova import NovaIE
-from .novamov import NovaMovIE
+from .novamov import (
+ NovaMovIE,
+ WholeCloudIE,
+ NowVideoIE,
+ VideoWeedIE,
+ CloudTimeIE,
+)
from .nowness import (
NownessIE,
NownessPlaylistIE,
@@ -425,7 +438,6 @@ from .nowtv import (
NowTVIE,
NowTVListIE,
)
-from .nowvideo import NowVideoIE
from .npo import (
NPOIE,
NPOLiveIE,
@@ -554,6 +566,10 @@ from .shahid import ShahidIE
from .shared import SharedIE
from .sharesix import ShareSixIE
from .sina import SinaIE
+from .skynewsarabia import (
+ SkyNewsArabiaIE,
+ SkyNewsArabiaArticleIE,
+)
from .slideshare import SlideshareIE
from .slutload import SlutloadIE
from .smotri import (
@@ -732,7 +748,6 @@ from .videofyme import VideofyMeIE
from .videomega import VideoMegaIE
from .videopremium import VideoPremiumIE
from .videott import VideoTtIE
-from .videoweed import VideoWeedIE
from .vidme import VidmeIE
from .vidzi import VidziIE
from .vier import VierIE, VierVideosIE
diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py
new file mode 100644
index 000000000..be7913bc7
--- /dev/null
+++ b/youtube_dl/extractor/acast.py
@@ -0,0 +1,70 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import int_or_none
+
+
+class ACastBaseIE(InfoExtractor):
+ _API_BASE_URL = 'https://www.acast.com/api/'
+
+
+class ACastIE(ACastBaseIE):
+ IE_NAME = 'acast'
+ _VALID_URL = r'https?://(?:www\.)?acast\.com/(?P[^/]+)/(?P[^/#?]+)'
+ _TEST = {
+ 'url': 'https://www.acast.com/condenasttraveler/-where-are-you-taipei-101-taiwan',
+ 'md5': 'ada3de5a1e3a2a381327d749854788bb',
+ 'info_dict': {
+ 'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
+ 'ext': 'mp3',
+ 'title': '"Where Are You?": Taipei 101, Taiwan',
+ 'timestamp': 1196172000000,
+ 'description': 'md5:0c5d8201dfea2b93218ea986c91eee6e',
+ 'duration': 211,
+ }
+ }
+
+ def _real_extract(self, url):
+ channel, display_id = re.match(self._VALID_URL, url).groups()
+ cast_data = self._download_json(self._API_BASE_URL + 'channels/%s/acasts/%s/playback' % (channel, display_id), display_id)
+
+ return {
+ 'id': compat_str(cast_data['id']),
+ 'display_id': display_id,
+ 'url': cast_data['blings'][0]['audio'],
+ 'title': cast_data['name'],
+ 'description': cast_data.get('description'),
+ 'thumbnail': cast_data.get('image'),
+ 'timestamp': int_or_none(cast_data.get('publishingDate')),
+ 'duration': int_or_none(cast_data.get('duration')),
+ }
+
+
+class ACastChannelIE(ACastBaseIE):
+ IE_NAME = 'acast:channel'
+ _VALID_URL = r'https?://(?:www\.)?acast\.com/(?P[^/#?]+)'
+ _TEST = {
+ 'url': 'https://www.acast.com/condenasttraveler',
+ 'info_dict': {
+ 'id': '50544219-29bb-499e-a083-6087f4cb7797',
+ 'title': 'Condé Nast Traveler Podcast',
+ 'description': 'md5:98646dee22a5b386626ae31866638fbd',
+ },
+ 'playlist_mincount': 20,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ channel_data = self._download_json(self._API_BASE_URL + 'channels/%s' % display_id, display_id)
+ casts = self._download_json(self._API_BASE_URL + 'channels/%s/acasts' % display_id, display_id)
+ entries = [self.url_result('https://www.acast.com/%s/%s' % (display_id, cast['url']), 'ACast') for cast in casts]
+
+ return self.playlist_result(entries, compat_str(channel_data['id']), channel_data['name'], channel_data.get('description'))
diff --git a/youtube_dl/extractor/adobetv.py b/youtube_dl/extractor/adobetv.py
index 5e43adc51..8753ee2cf 100644
--- a/youtube_dl/extractor/adobetv.py
+++ b/youtube_dl/extractor/adobetv.py
@@ -1,23 +1,32 @@
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
parse_duration,
unified_strdate,
str_to_int,
+ int_or_none,
float_or_none,
ISO639Utils,
+ determine_ext,
)
-class AdobeTVIE(InfoExtractor):
- _VALID_URL = r'https?://tv\.adobe\.com/watch/[^/]+/(?P[^/]+)'
+class AdobeTVBaseIE(InfoExtractor):
+ _API_BASE_URL = 'http://tv.adobe.com/api/v4/'
+
+
+class AdobeTVIE(AdobeTVBaseIE):
+ _VALID_URL = r'https?://tv\.adobe\.com/(?:(?Pfr|de|es|jp)/)?watch/(?P[^/]+)/(?P[^/]+)'
_TEST = {
'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/',
'md5': '9bc5727bcdd55251f35ad311ca74fa1e',
'info_dict': {
- 'id': 'quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop',
+ 'id': '10981',
'ext': 'mp4',
'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop',
'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311',
@@ -29,50 +38,106 @@ class AdobeTVIE(InfoExtractor):
}
def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ language, show_urlname, urlname = re.match(self._VALID_URL, url).groups()
+ if not language:
+ language = 'en'
- player = self._parse_json(
- self._search_regex(r'html5player:\s*({.+?})\s*\n', webpage, 'player'),
- video_id)
-
- title = player.get('title') or self._search_regex(
- r'data-title="([^"]+)"', webpage, 'title')
- description = self._og_search_description(webpage)
- thumbnail = self._og_search_thumbnail(webpage)
-
- upload_date = unified_strdate(
- self._html_search_meta('datepublished', webpage, 'upload date'))
-
- duration = parse_duration(
- self._html_search_meta('duration', webpage, 'duration') or
- self._search_regex(
- r'Runtime:\s*(\d{2}:\d{2}:\d{2})',
- webpage, 'duration', fatal=False))
-
- view_count = str_to_int(self._search_regex(
- r'\s*Views?:\s*([\d,.]+)\s*
',
- webpage, 'view count'))
+ video_data = self._download_json(
+ self._API_BASE_URL + 'episode/get/?language=%s&show_urlname=%s&urlname=%s&disclosure=standard' % (language, show_urlname, urlname),
+ urlname)['data'][0]
formats = [{
- 'url': source['src'],
- 'format_id': source.get('quality') or source['src'].split('-')[-1].split('.')[0] or None,
- 'tbr': source.get('bitrate'),
- } for source in player['sources']]
+ 'url': source['url'],
+ 'format_id': source.get('quality_level') or source['url'].split('-')[-1].split('.')[0] or None,
+ 'width': int_or_none(source.get('width')),
+ 'height': int_or_none(source.get('height')),
+ 'tbr': int_or_none(source.get('video_data_rate')),
+ } for source in video_data['videos']]
self._sort_formats(formats)
return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'upload_date': upload_date,
- 'duration': duration,
- 'view_count': view_count,
+ 'id': compat_str(video_data['id']),
+ 'title': video_data['title'],
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('thumbnail'),
+ 'upload_date': unified_strdate(video_data.get('start_date')),
+ 'duration': parse_duration(video_data.get('duration')),
+ 'view_count': str_to_int(video_data.get('playcount')),
'formats': formats,
}
+class AdobeTVPlaylistBaseIE(AdobeTVBaseIE):
+ def _parse_page_data(self, page_data):
+ return [self.url_result(self._get_element_url(element_data)) for element_data in page_data]
+
+ def _extract_playlist_entries(self, url, display_id):
+ page = self._download_json(url, display_id)
+ entries = self._parse_page_data(page['data'])
+ for page_num in range(2, page['paging']['pages'] + 1):
+ entries.extend(self._parse_page_data(
+ self._download_json(url + '&page=%d' % page_num, display_id)['data']))
+ return entries
+
+
+class AdobeTVShowIE(AdobeTVPlaylistBaseIE):
+ _VALID_URL = r'https?://tv\.adobe\.com/(?:(?Pfr|de|es|jp)/)?show/(?P[^/]+)'
+
+ _TEST = {
+ 'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost',
+ 'info_dict': {
+ 'id': '36',
+ 'title': 'The Complete Picture with Julieanne Kost',
+ 'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27',
+ },
+ 'playlist_mincount': 136,
+ }
+
+ def _get_element_url(self, element_data):
+ return element_data['urls'][0]
+
+ def _real_extract(self, url):
+ language, show_urlname = re.match(self._VALID_URL, url).groups()
+ if not language:
+ language = 'en'
+ query = 'language=%s&show_urlname=%s' % (language, show_urlname)
+
+ show_data = self._download_json(self._API_BASE_URL + 'show/get/?%s' % query, show_urlname)['data'][0]
+
+ return self.playlist_result(
+ self._extract_playlist_entries(self._API_BASE_URL + 'episode/?%s' % query, show_urlname),
+ compat_str(show_data['id']),
+ show_data['show_name'],
+ show_data['show_description'])
+
+
+class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
+ _VALID_URL = r'https?://tv\.adobe\.com/(?:(?Pfr|de|es|jp)/)?channel/(?P[^/]+)(?:/(?P[^/]+))?'
+
+ _TEST = {
+ 'url': 'http://tv.adobe.com/channel/development',
+ 'info_dict': {
+ 'id': 'development',
+ },
+ 'playlist_mincount': 96,
+ }
+
+ def _get_element_url(self, element_data):
+ return element_data['url']
+
+ def _real_extract(self, url):
+ language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups()
+ if not language:
+ language = 'en'
+ query = 'language=%s&channel_urlname=%s' % (language, channel_urlname)
+ if category_urlname:
+ query += '&category_urlname=%s' % category_urlname
+
+ return self.playlist_result(
+ self._extract_playlist_entries(self._API_BASE_URL + 'show/?%s' % query, channel_urlname),
+ channel_urlname)
+
+
class AdobeTVVideoIE(InfoExtractor):
_VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P\d+)'
@@ -91,28 +156,25 @@ class AdobeTVVideoIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- player_params = self._parse_json(self._search_regex(
- r'var\s+bridge\s*=\s*([^;]+);', webpage, 'player parameters'),
- video_id)
+ video_data = self._download_json(url + '?format=json', video_id)
formats = [{
+ 'format_id': '%s-%s' % (determine_ext(source['src']), source.get('height')),
'url': source['src'],
- 'width': source.get('width'),
- 'height': source.get('height'),
- 'tbr': source.get('bitrate'),
- } for source in player_params['sources']]
+ 'width': int_or_none(source.get('width')),
+ 'height': int_or_none(source.get('height')),
+ 'tbr': int_or_none(source.get('bitrate')),
+ } for source in video_data['sources']]
+ self._sort_formats(formats)
# For both metadata and downloaded files the duration varies among
# formats. I just pick the max one
duration = max(filter(None, [
float_or_none(source.get('duration'), scale=1000)
- for source in player_params['sources']]))
+ for source in video_data['sources']]))
subtitles = {}
- for translation in player_params.get('translations', []):
+ for translation in video_data.get('translations', []):
lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
if lang_id not in subtitles:
subtitles[lang_id] = []
@@ -124,8 +186,9 @@ class AdobeTVVideoIE(InfoExtractor):
return {
'id': video_id,
'formats': formats,
- 'title': player_params['title'],
- 'description': self._og_search_description(webpage),
+ 'title': video_data['title'],
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data['video'].get('poster'),
'duration': duration,
'subtitles': subtitles,
}
diff --git a/youtube_dl/extractor/audimedia.py b/youtube_dl/extractor/audimedia.py
new file mode 100644
index 000000000..b0b089dee
--- /dev/null
+++ b/youtube_dl/extractor/audimedia.py
@@ -0,0 +1,80 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ sanitized_Request,
+)
+
+
+class AudiMediaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?audimedia\.tv/(?:en|de)/vid/(?P[^/?#]+)'
+ _TEST = {
+ 'url': 'https://audimedia.tv/en/vid/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test',
+ 'md5': '79a8b71c46d49042609795ab59779b66',
+ 'info_dict': {
+ 'id': '1564',
+ 'ext': 'mp4',
+ 'title': '60 Seconds of Audi Sport 104/2015 - WEC Bahrain, Rookie Test',
+ 'description': 'md5:60e5d30a78ced725f7b8d34370762941',
+ 'upload_date': '20151124',
+ 'timestamp': 1448354940,
+ 'duration': 74022,
+ 'view_count': int,
+ }
+ }
+ # extracted from https://audimedia.tv/assets/embed/embedded-player.js (dataSourceAuthToken)
+ _AUTH_TOKEN = 'e25b42847dba18c6c8816d5d8ce94c326e06823ebf0859ed164b3ba169be97f2'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ raw_payload = self._search_regex(r'', embed_page,
+ 'embed vars')
+ info = self._parse_json(embed_vars_json, video_id)
+
+ formats = []
+ for media in info['media']:
+ if media['mediaPurpose'] == 'play':
+ formats.append({
+ 'url': media['uri'],
+ 'height': media['height'],
+ 'width:': media['width'],
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': info.get('thumbUri'),
+ 'description': self._og_search_description(webpage),
+ 'duration': int_or_none(info.get('videoLengthInSeconds')),
+ 'age_limit': parse_age_limit(info.get('audienceRating')),
+ }
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 5075d131e..e3bdff2d8 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -339,6 +339,7 @@ class GenericIE(InfoExtractor):
'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
'ext': 'mp4',
'title': '2cc213299525360.mov', # that's what we get
+ 'duration': 238.231,
},
'add_ie': ['Ooyala'],
},
@@ -350,6 +351,7 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'title': '"Steve Jobs: Man in the Machine" trailer',
'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
+ 'duration': 135.427,
},
'params': {
'skip_download': True,
@@ -960,8 +962,9 @@ class GenericIE(InfoExtractor):
'info_dict': {
'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
'ext': 'mp4',
- 'description': 'VIDEO: Index/Match versus VLOOKUP.',
+ 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
'title': 'This is what separates the Excel masters from the wannabes',
+ 'duration': 191.933,
},
'params': {
# m3u8 downloads
@@ -1501,7 +1504,7 @@ class GenericIE(InfoExtractor):
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P.{32})[\'"]\)', webpage) or
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P.{32})[\'"]', webpage))
if mobj is not None:
- return OoyalaIE._build_url_result(mobj.group('ec'))
+ return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
# Look for multiple Ooyala embeds on SBN network websites
mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
@@ -1509,7 +1512,7 @@ class GenericIE(InfoExtractor):
embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
if embeds:
return _playlist_from_matches(
- embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
+ embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
# Look for Aparat videos
mobj = re.search(r'
'
- _DESCRIPTION_REGEX = r'Description: ([^<]+)'
-
- _TEST = {
- 'url': 'http://www.movshare.net/video/559e28be54d96',
- 'md5': 'abd31a2132947262c50429e1d16c1bfd',
- 'info_dict': {
- 'id': '559e28be54d96',
- 'ext': 'flv',
- 'title': 'dissapeared image',
- 'description': 'optical illusion dissapeared image magic illusion',
- }
- }
diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py
index 944096e1c..7c6b7841d 100644
--- a/youtube_dl/extractor/nba.py
+++ b/youtube_dl/extractor/nba.py
@@ -1,63 +1,102 @@
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import (
- remove_end,
parse_duration,
+ int_or_none,
+ xpath_text,
+ xpath_attr,
)
class NBAIE(InfoExtractor):
- _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P/[^?]*?)/?(?:/index\.html)?(?:\?.*)?$'
+ _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P(?:[^/]+/)?video/(?P[^?]*?))/?(?:/index\.html)?(?:\?.*)?$'
_TESTS = [{
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
- 'md5': 'c0edcfc37607344e2ff8f13c378c88a4',
+ 'md5': '9e7729d3010a9c71506fd1248f74e4f4',
'info_dict': {
- 'id': '0021200253-okc-bkn-recap.nba',
- 'ext': 'mp4',
+ 'id': '0021200253-okc-bkn-recap',
+ 'ext': 'flv',
'title': 'Thunder vs. Nets',
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
'duration': 181,
+ 'timestamp': 1354638466,
+ 'upload_date': '20121204',
},
}, {
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
'only_matching': True,
}, {
- 'url': 'http://watch.nba.com/nba/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
+ 'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
+ 'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
'info_dict': {
- 'id': '0041400301-cle-atl-recap.nba',
+ 'id': '0041400301-cle-atl-recap',
'ext': 'mp4',
- 'title': 'NBA GAME TIME | Video: Hawks vs. Cavaliers Game 1',
+ 'title': 'Hawks vs. Cavaliers Game 1',
'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
'duration': 228,
- },
- 'params': {
- 'skip_download': True,
+ 'timestamp': 1432134543,
+ 'upload_date': '20150520',
}
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ path, video_id = re.match(self._VALID_URL, url).groups()
+ if path.startswith('nba/'):
+ path = path[3:]
+ video_info = self._download_xml('http://www.nba.com/%s.xml' % path, video_id)
+ video_id = xpath_text(video_info, 'slug')
+ title = xpath_text(video_info, 'headline')
+ description = xpath_text(video_info, 'description')
+ duration = parse_duration(xpath_text(video_info, 'length'))
+ timestamp = int_or_none(xpath_attr(video_info, 'dateCreated', 'uts'))
- video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
+ thumbnails = []
+ for image in video_info.find('images'):
+ thumbnails.append({
+ 'id': image.attrib.get('cut'),
+ 'url': image.text,
+ 'width': int_or_none(image.attrib.get('width')),
+ 'height': int_or_none(image.attrib.get('height')),
+ })
- shortened_video_id = video_id.rpartition('/')[2]
- title = remove_end(
- self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com')
-
- description = self._og_search_description(webpage)
- duration_str = self._html_search_meta(
- 'duration', webpage, 'duration', default=None)
- if not duration_str:
- duration_str = self._html_search_regex(
- r'Duration:\s*(\d+:\d+)', webpage, 'duration', fatal=False)
- duration = parse_duration(duration_str)
+ formats = []
+ for video_file in video_info.findall('.//file'):
+ video_url = video_file.text
+ if video_url.startswith('/'):
+ continue
+ if video_url.endswith('.m3u8'):
+ m3u8_formats = self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+ elif video_url.endswith('.f4m'):
+ f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.4.1.1', video_id, f4m_id='hds', fatal=False)
+ if f4m_formats:
+ formats.extend(f4m_formats)
+ else:
+ key = video_file.attrib.get('bitrate')
+ format_info = {
+ 'format_id': key,
+ 'url': video_url,
+ }
+ mobj = re.search(r'(\d+)x(\d+)(?:_(\d+))?', key)
+ if mobj:
+ format_info.update({
+ 'width': int(mobj.group(1)),
+ 'height': int(mobj.group(2)),
+ 'tbr': int_or_none(mobj.group(3)),
+ })
+ formats.append(format_info)
+ self._sort_formats(formats)
return {
- 'id': shortened_video_id,
- 'url': video_url,
+ 'id': video_id,
'title': title,
'description': description,
'duration': duration,
+ 'timestamp': timestamp,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
}
diff --git a/youtube_dl/extractor/novamov.py b/youtube_dl/extractor/novamov.py
index 6163e8855..837c91559 100644
--- a/youtube_dl/extractor/novamov.py
+++ b/youtube_dl/extractor/novamov.py
@@ -92,3 +92,89 @@ class NovaMovIE(InfoExtractor):
'title': title,
'description': description
}
+
+
+class WholeCloudIE(NovaMovIE):
+ IE_NAME = 'wholecloud'
+ IE_DESC = 'WholeCloud'
+
+ _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': '(?:wholecloud\.net|movshare\.(?:net|sx|ag))'}
+
+ _HOST = 'www.wholecloud.net'
+
+ _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
+ _TITLE_REGEX = r'Title: ([^<]+)'
+ _DESCRIPTION_REGEX = r'Description: ([^<]+)'
+
+ _TEST = {
+ 'url': 'http://www.wholecloud.net/video/559e28be54d96',
+ 'md5': 'abd31a2132947262c50429e1d16c1bfd',
+ 'info_dict': {
+ 'id': '559e28be54d96',
+ 'ext': 'flv',
+ 'title': 'dissapeared image',
+ 'description': 'optical illusion dissapeared image magic illusion',
+ }
+ }
+
+
+class NowVideoIE(NovaMovIE):
+ IE_NAME = 'nowvideo'
+ IE_DESC = 'NowVideo'
+
+ _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'}
+
+ _HOST = 'www.nowvideo.to'
+
+ _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
+ _FILEKEY_REGEX = r'var fkzd="([^"]+)";'
+ _TITLE_REGEX = r'([^<]+)
'
+ _DESCRIPTION_REGEX = r'\s*([^<]+)
'
+
+ _TEST = {
+ 'url': 'http://www.nowvideo.to/video/0mw0yow7b6dxa',
+ 'md5': 'f8fbbc8add72bd95b7850c6a02fc8817',
+ 'info_dict': {
+ 'id': '0mw0yow7b6dxa',
+ 'ext': 'flv',
+ 'title': 'youtubedl test video _BaW_jenozKc.mp4',
+ 'description': 'Description',
+ }
+ }
+
+
+class VideoWeedIE(NovaMovIE):
+ IE_NAME = 'videoweed'
+ IE_DESC = 'VideoWeed'
+
+ _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'videoweed\.(?:es|com)'}
+
+ _HOST = 'www.videoweed.es'
+
+ _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
+ _TITLE_REGEX = r'([^<]+)
'
+
+ _TEST = {
+ 'url': 'http://www.videoweed.es/file/b42178afbea14',
+ 'md5': 'abd31a2132947262c50429e1d16c1bfd',
+ 'info_dict': {
+ 'id': 'b42178afbea14',
+ 'ext': 'flv',
+ 'title': 'optical illusion dissapeared image magic illusion',
+ 'description': ''
+ },
+ }
+
+
+class CloudTimeIE(NovaMovIE):
+ IE_NAME = 'cloudtime'
+ IE_DESC = 'CloudTime'
+
+ _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'cloudtime\.to'}
+
+ _HOST = 'www.cloudtime.to'
+
+ _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
+ _TITLE_REGEX = r']+class=["\']video_det["\'][^>]*>\s*
([^<]+)'
+
+ _TEST = None
diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py
index 67e34b294..fd107aca2 100644
--- a/youtube_dl/extractor/nowtv.py
+++ b/youtube_dl/extractor/nowtv.py
@@ -71,7 +71,7 @@ class NowTVBaseIE(InfoExtractor):
class NowTVIE(NowTVBaseIE):
- _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P
[^/]+)/(?:list/[^/]+/)?(?P[^/]+)/(?:player|preview)'
+ _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P[^/]+)/(?:player|preview)'
_TESTS = [{
# rtl
@@ -190,6 +190,9 @@ class NowTVIE(NowTVBaseIE):
}, {
'url': 'http://www.nowtv.de/rtl2/echtzeit/list/aktuell/schnelles-geld-am-ende-der-welt/player',
'only_matching': True,
+ }, {
+ 'url': 'http://www.nowtv.de/rtl2/zuhause-im-glueck/jahr/2015/11/eine-erschuetternde-diagnose/player',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/nowvideo.py b/youtube_dl/extractor/nowvideo.py
deleted file mode 100644
index 57ee3d366..000000000
--- a/youtube_dl/extractor/nowvideo.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from __future__ import unicode_literals
-
-from .novamov import NovaMovIE
-
-
-class NowVideoIE(NovaMovIE):
- IE_NAME = 'nowvideo'
- IE_DESC = 'NowVideo'
-
- _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'}
-
- _HOST = 'www.nowvideo.to'
-
- _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
- _FILEKEY_REGEX = r'var fkzd="([^"]+)";'
- _TITLE_REGEX = r'([^<]+)
'
- _DESCRIPTION_REGEX = r'\s*([^<]+)
'
-
- _TEST = {
- 'url': 'http://www.nowvideo.ch/video/0mw0yow7b6dxa',
- 'md5': 'f8fbbc8add72bd95b7850c6a02fc8817',
- 'info_dict': {
- 'id': '0mw0yow7b6dxa',
- 'ext': 'flv',
- 'title': 'youtubedl test video _BaW_jenozKc.mp4',
- 'description': 'Description',
- }
- }
diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py
index a262a9f6d..35067e271 100644
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@@ -1,108 +1,69 @@
from __future__ import unicode_literals
import re
-import json
import base64
from .common import InfoExtractor
from ..utils import (
- unescapeHTML,
- ExtractorError,
- determine_ext,
int_or_none,
+ float_or_none,
+ ExtractorError,
+ unsmuggle_url,
)
+from ..compat import compat_urllib_parse
class OoyalaBaseIE(InfoExtractor):
- def _extract_result(self, info, more_info):
- embedCode = info['embedCode']
- video_url = info.get('ipad_url') or info['url']
-
- if determine_ext(video_url) == 'm3u8':
- formats = self._extract_m3u8_formats(video_url, embedCode, ext='mp4')
- else:
- formats = [{
- 'url': video_url,
- 'ext': 'mp4',
- }]
-
- return {
- 'id': embedCode,
- 'title': unescapeHTML(info['title']),
- 'formats': formats,
- 'description': unescapeHTML(more_info['description']),
- 'thumbnail': more_info['promo'],
+ def _extract(self, content_tree_url, video_id, domain='example.org'):
+ content_tree = self._download_json(content_tree_url, video_id)['content_tree']
+ metadata = content_tree[list(content_tree)[0]]
+ embed_code = metadata['embed_code']
+ pcode = metadata.get('asset_pcode') or embed_code
+ video_info = {
+ 'id': embed_code,
+ 'title': metadata['title'],
+ 'description': metadata.get('description'),
+ 'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
+ 'duration': float_or_none(metadata.get('duration'), 1000),
}
- def _extract(self, player_url, video_id):
- player = self._download_webpage(player_url, video_id)
- mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
- player, 'mobile player url')
- # Looks like some videos are only available for particular devices
- # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0
- # is only available for ipad)
- # Working around with fetching URLs for all the devices found starting with 'unknown'
- # until we succeed or eventually fail for each device.
- devices = re.findall(r'device\s*=\s*"([^"]+)";', player)
- devices.remove('unknown')
- devices.insert(0, 'unknown')
- for device in devices:
- mobile_player = self._download_webpage(
- '%s&device=%s' % (mobile_url, device), video_id,
- 'Downloading mobile player JS for %s device' % device)
- videos_info = self._search_regex(
- r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
- mobile_player, 'info', fatal=False, default=None)
- if videos_info:
- break
-
- if not videos_info:
- formats = []
+ formats = []
+ for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'):
auth_data = self._download_json(
- 'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=mp4,webm' % (video_id, video_id),
- video_id)
+ 'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?' % (pcode, embed_code) + compat_urllib_parse.urlencode({'domain': domain, 'supportedFormats': supported_format}),
+ video_id, 'Downloading %s JSON' % supported_format)
- cur_auth_data = auth_data['authorization_data'][video_id]
+ cur_auth_data = auth_data['authorization_data'][embed_code]
- for stream in cur_auth_data['streams']:
- formats.append({
- 'url': base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8'),
- 'ext': stream.get('delivery_type'),
- 'format': stream.get('video_codec'),
- 'format_id': stream.get('profile'),
- 'width': int_or_none(stream.get('width')),
- 'height': int_or_none(stream.get('height')),
- 'abr': int_or_none(stream.get('audio_bitrate')),
- 'vbr': int_or_none(stream.get('video_bitrate')),
- })
- if formats:
- return {
- 'id': video_id,
- 'formats': formats,
- 'title': 'Ooyala video',
- }
+ if cur_auth_data['authorized']:
+ for stream in cur_auth_data['streams']:
+ url = base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8')
+ delivery_type = stream['delivery_type']
+ if delivery_type == 'remote_asset':
+ video_info['url'] = url
+ return video_info
+ if delivery_type == 'hls':
+ formats.extend(self._extract_m3u8_formats(url, embed_code, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ elif delivery_type == 'hds':
+ formats.extend(self._extract_f4m_formats(url, embed_code, -1, 'hds', fatal=False))
+ else:
+ formats.append({
+ 'url': url,
+ 'ext': stream.get('delivery_type'),
+ 'vcodec': stream.get('video_codec'),
+ 'format_id': '%s-%s-%sp' % (stream.get('profile'), delivery_type, stream.get('height')),
+ 'width': int_or_none(stream.get('width')),
+ 'height': int_or_none(stream.get('height')),
+ 'abr': int_or_none(stream.get('audio_bitrate')),
+ 'vbr': int_or_none(stream.get('video_bitrate')),
+ 'fps': float_or_none(stream.get('framerate')),
+ })
+ else:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, cur_auth_data['message']), expected=True)
+ self._sort_formats(formats)
- if not cur_auth_data['authorized']:
- raise ExtractorError(cur_auth_data['message'], expected=True)
-
- if not videos_info:
- raise ExtractorError('Unable to extract info')
- videos_info = videos_info.replace('\\"', '"')
- videos_more_info = self._search_regex(
- r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"')
- videos_info = json.loads(videos_info)
- videos_more_info = json.loads(videos_more_info)
-
- if videos_more_info.get('lineup'):
- videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
- return {
- '_type': 'playlist',
- 'id': video_id,
- 'title': unescapeHTML(videos_more_info['title']),
- 'entries': videos,
- }
- else:
- return self._extract_result(videos_info[0], videos_more_info)
+ video_info['formats'] = formats
+ return video_info
class OoyalaIE(OoyalaBaseIE):
@@ -117,6 +78,7 @@ class OoyalaIE(OoyalaBaseIE):
'ext': 'mp4',
'title': 'Explaining Data Recovery from Hard Drives and SSDs',
'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
+ 'duration': 853.386,
},
}, {
# Only available for ipad
@@ -125,7 +87,7 @@ class OoyalaIE(OoyalaBaseIE):
'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
'ext': 'mp4',
'title': 'Simulation Overview - Levels of Simulation',
- 'description': '',
+ 'duration': 194.948,
},
},
{
@@ -136,7 +98,8 @@ class OoyalaIE(OoyalaBaseIE):
'info_dict': {
'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx',
'ext': 'mp4',
- 'title': 'Ooyala video',
+ 'title': 'Divide Tool Path.mp4',
+ 'duration': 204.405,
}
}
]
@@ -151,9 +114,11 @@ class OoyalaIE(OoyalaBaseIE):
ie=cls.ie_key())
def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
embed_code = self._match_id(url)
- player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
- return self._extract(player_url, embed_code)
+ domain = smuggled_data.get('domain')
+ content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/embed_code/%s/%s' % (embed_code, embed_code)
+ return self._extract(content_tree_url, embed_code, domain)
class OoyalaExternalIE(OoyalaBaseIE):
@@ -170,7 +135,7 @@ class OoyalaExternalIE(OoyalaBaseIE):
.*?&pcode=
)
(?P.+?)
- (&|$)
+ (?:&|$)
'''
_TEST = {
@@ -179,7 +144,7 @@ class OoyalaExternalIE(OoyalaBaseIE):
'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
'ext': 'mp4',
'title': 'dm_140128_30for30Shorts___JudgingJewellv2',
- 'description': '',
+ 'duration': 1302000,
},
'params': {
# m3u8 download
@@ -188,9 +153,6 @@ class OoyalaExternalIE(OoyalaBaseIE):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- partner_id = mobj.group('partner_id')
- video_id = mobj.group('id')
- pcode = mobj.group('pcode')
- player_url = 'http://player.ooyala.com/player.js?externalId=%s:%s&pcode=%s' % (partner_id, video_id, pcode)
- return self._extract(player_url, video_id)
+ partner_id, video_id, pcode = re.match(self._VALID_URL, url).groups()
+ content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/external_id/%s/%s:%s' % (pcode, partner_id, video_id)
+ return self._extract(content_tree_url, video_id)
diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py
index aa7dbcb63..55c11b3bf 100644
--- a/youtube_dl/extractor/pluralsight.py
+++ b/youtube_dl/extractor/pluralsight.py
@@ -1,5 +1,6 @@
from __future__ import unicode_literals
+import re
import json
import random
import collections
@@ -14,6 +15,7 @@ from ..utils import (
ExtractorError,
int_or_none,
parse_duration,
+ qualities,
sanitized_Request,
)
@@ -140,15 +142,28 @@ class PluralsightIE(PluralsightBaseIE):
'low': {'width': 640, 'height': 480},
'medium': {'width': 848, 'height': 640},
'high': {'width': 1024, 'height': 768},
+ 'high-widescreen': {'width': 1280, 'height': 720},
}
+ QUALITIES_PREFERENCE = ('low', 'medium', 'high', 'high-widescreen',)
+ quality_key = qualities(QUALITIES_PREFERENCE)
+
AllowedQuality = collections.namedtuple('AllowedQuality', ['ext', 'qualities'])
ALLOWED_QUALITIES = (
- AllowedQuality('webm', ('high',)),
- AllowedQuality('mp4', ('low', 'medium', 'high',)),
+ AllowedQuality('webm', ['high', ]),
+ AllowedQuality('mp4', ['low', 'medium', 'high', ]),
)
+ # Some courses also offer widescreen resolution for high quality (see
+ # https://github.com/rg3/youtube-dl/issues/7766)
+ widescreen = True if re.search(
+ r'courseSupportsWidescreenVideoFormats\s*:\s*true', webpage) else False
+ best_quality = 'high-widescreen' if widescreen else 'high'
+ if widescreen:
+ for allowed_quality in ALLOWED_QUALITIES:
+ allowed_quality.qualities.append(best_quality)
+
# In order to minimize the number of calls to ViewClip API and reduce
# the probability of being throttled or banned by Pluralsight we will request
# only single format until formats listing was explicitly requested.
@@ -157,19 +172,19 @@ class PluralsightIE(PluralsightBaseIE):
else:
def guess_allowed_qualities():
req_format = self._downloader.params.get('format') or 'best'
- req_format_split = req_format.split('-')
+ req_format_split = req_format.split('-', 1)
if len(req_format_split) > 1:
req_ext, req_quality = req_format_split
for allowed_quality in ALLOWED_QUALITIES:
if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities:
return (AllowedQuality(req_ext, (req_quality, )), )
req_ext = 'webm' if self._downloader.params.get('prefer_free_formats') else 'mp4'
- return (AllowedQuality(req_ext, ('high', )), )
+ return (AllowedQuality(req_ext, (best_quality, )), )
allowed_qualities = guess_allowed_qualities()
formats = []
- for ext, qualities in allowed_qualities:
- for quality in qualities:
+ for ext, qualities_ in allowed_qualities:
+ for quality in qualities_:
f = QUALITIES[quality].copy()
clip_post = {
'a': author,
@@ -205,6 +220,7 @@ class PluralsightIE(PluralsightBaseIE):
'url': clip_url,
'ext': ext,
'format_id': format_id,
+ 'quality': quality_key(quality),
})
formats.append(f)
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/skynewsarabia.py b/youtube_dl/extractor/skynewsarabia.py
new file mode 100644
index 000000000..05e1b02ad
--- /dev/null
+++ b/youtube_dl/extractor/skynewsarabia.py
@@ -0,0 +1,117 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ parse_iso8601,
+ parse_duration,
+)
+
+
+class SkyNewsArabiaBaseIE(InfoExtractor):
+ _IMAGE_BASE_URL = 'http://www.skynewsarabia.com/web/images'
+
+ def _call_api(self, path, value):
+ return self._download_json('http://api.skynewsarabia.com/web/rest/v2/%s/%s.json' % (path, value), value)
+
+ def _get_limelight_media_id(self, url):
+ return self._search_regex(r'/media/[^/]+/([a-z0-9]{32})', url, 'limelight media id')
+
+ def _get_image_url(self, image_path_template, width='1600', height='1200'):
+ return self._IMAGE_BASE_URL + image_path_template.format(width=width, height=height)
+
+ def _extract_video_info(self, video_data):
+ video_id = compat_str(video_data['id'])
+ topic = video_data.get('topicTitle')
+ return {
+ '_type': 'url_transparent',
+ 'url': 'limelight:media:%s' % self._get_limelight_media_id(video_data['videoUrl'][0]['url']),
+ 'id': video_id,
+ 'title': video_data['headline'],
+ 'description': video_data.get('summary'),
+ 'thumbnail': self._get_image_url(video_data['mediaAsset']['imageUrl']),
+ 'timestamp': parse_iso8601(video_data.get('date')),
+ 'duration': parse_duration(video_data.get('runTime')),
+ 'tags': video_data.get('tags', []),
+ 'categories': [topic] if topic else [],
+ 'webpage_url': 'http://www.skynewsarabia.com/web/video/%s' % video_id,
+ 'ie_key': 'LimelightMedia',
+ }
+
+
+class SkyNewsArabiaIE(SkyNewsArabiaBaseIE):
+ IE_NAME = 'skynewsarabia:video'
+ _VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/video/(?P[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.skynewsarabia.com/web/video/794902/%D9%86%D8%B5%D9%81-%D9%85%D9%84%D9%8A%D9%88%D9%86-%D9%85%D8%B5%D8%A8%D8%A7%D8%AD-%D8%B4%D8%AC%D8%B1%D8%A9-%D9%83%D8%B1%D9%8A%D8%B3%D9%85%D8%A7%D8%B3',
+ 'info_dict': {
+ 'id': '794902',
+ 'ext': 'flv',
+ 'title': 'نصف مليون مصباح على شجرة كريسماس',
+ 'description': 'md5:22f1b27f0850eeb10c7e59b1f16eb7c6',
+ 'upload_date': '20151128',
+ 'timestamp': 1448697198,
+ 'duration': 2119,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._call_api('video', video_id)
+ return self._extract_video_info(video_data)
+
+
+class SkyNewsArabiaArticleIE(SkyNewsArabiaBaseIE):
+ IE_NAME = 'skynewsarabia:video'
+ _VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/article/(?P[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.skynewsarabia.com/web/article/794549/%D8%A7%D9%94%D8%AD%D8%AF%D8%A7%D8%AB-%D8%A7%D9%84%D8%B4%D8%B1%D9%82-%D8%A7%D9%84%D8%A7%D9%94%D9%88%D8%B3%D8%B7-%D8%AE%D8%B1%D9%8A%D8%B7%D8%A9-%D8%A7%D9%84%D8%A7%D9%94%D9%84%D8%B9%D8%A7%D8%A8-%D8%A7%D9%84%D8%B0%D9%83%D9%8A%D8%A9',
+ 'info_dict': {
+ 'id': '794549',
+ 'ext': 'flv',
+ 'title': 'بالفيديو.. ألعاب ذكية تحاكي واقع المنطقة',
+ 'description': 'md5:0c373d29919a851e080ee4edd0c5d97f',
+ 'upload_date': '20151126',
+ 'timestamp': 1448559336,
+ 'duration': 281.6,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.skynewsarabia.com/web/article/794844/%D8%A7%D8%B3%D8%AA%D9%87%D8%AF%D8%A7%D9%81-%D9%82%D9%88%D8%A7%D8%B1%D8%A8-%D8%A7%D9%94%D8%B3%D9%84%D8%AD%D8%A9-%D9%84%D9%85%D9%8A%D9%84%D9%8A%D8%B4%D9%8A%D8%A7%D8%AA-%D8%A7%D9%84%D8%AD%D9%88%D8%AB%D9%8A-%D9%88%D8%B5%D8%A7%D9%84%D8%AD',
+ 'info_dict': {
+ 'id': '794844',
+ 'title': 'إحباط تهريب أسلحة لميليشيات الحوثي وصالح بجنوب اليمن',
+ 'description': 'md5:5c927b8b2e805796e7f693538d96fc7e',
+ },
+ 'playlist_mincount': 2,
+ }]
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+ article_data = self._call_api('article', article_id)
+ media_asset = article_data['mediaAsset']
+ if media_asset['type'] == 'VIDEO':
+ topic = article_data.get('topicTitle')
+ return {
+ '_type': 'url_transparent',
+ 'url': 'limelight:media:%s' % self._get_limelight_media_id(media_asset['videoUrl'][0]['url']),
+ 'id': article_id,
+ 'title': article_data['headline'],
+ 'description': article_data.get('summary'),
+ 'thumbnail': self._get_image_url(media_asset['imageUrl']),
+ 'timestamp': parse_iso8601(article_data.get('date')),
+ 'tags': article_data.get('tags', []),
+ 'categories': [topic] if topic else [],
+ 'webpage_url': url,
+ 'ie_key': 'LimelightMedia',
+ }
+ entries = [self._extract_video_info(item) for item in article_data.get('inlineItems', []) if item['type'] == 'VIDEO']
+ return self.playlist_result(entries, article_id, article_data['headline'], article_data.get('summary'))
diff --git a/youtube_dl/extractor/srf.py b/youtube_dl/extractor/srf.py
index 77eec0bc7..16e1bf2d6 100644
--- a/youtube_dl/extractor/srf.py
+++ b/youtube_dl/extractor/srf.py
@@ -11,7 +11,7 @@ from ..utils import (
class SrfIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.srf\.ch/play(?:er)?/tv/[^/]+/video/(?P[^?]+)\?id=|tp\.srgssr\.ch/p/flash\?urn=urn:srf:ais:video:)(?P[0-9a-f\-]{36})'
+ _VALID_URL = r'https?://(?:www\.srf\.ch/play(?:er)?/(?:tv|radio)/[^/]+/(?Pvideo|audio)/(?P[^?]+)\?id=|tp\.srgssr\.ch/p/flash\?urn=urn:srf:ais:video:)(?P[0-9a-f\-]{36})'
_TESTS = [{
'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
'md5': '4cd93523723beff51bb4bee974ee238d',
@@ -35,6 +35,20 @@ class SrfIE(InfoExtractor):
'title': 'Jaguar XK120, Shadow und Tornado-Dampflokomotive',
'timestamp': 1373493600,
},
+ }, {
+ 'url': 'http://www.srf.ch/play/radio/hoerspielarchiv-srf-musikwelle/audio/saegel-ohni-wind-von-jakob-stebler?id=415bf3d3-6429-4de7-968d-95866e37cfbc',
+ 'md5': '',
+ 'info_dict': {
+ 'id': '415bf3d3-6429-4de7-968d-95866e37cfbc',
+ 'display_id': 'saegel-ohni-wind-von-jakob-stebler',
+ 'ext': 'mp3',
+ 'upload_date': '20080518',
+ 'title': '«Sägel ohni Wind» von Jakob Stebler',
+ 'timestamp': 1211112000,
+ },
+ 'params': {
+ 'skip_download': True, # requires rtmpdump
+ },
}, {
'url': 'http://www.srf.ch/player/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
'only_matching': True,
@@ -44,11 +58,13 @@ class SrfIE(InfoExtractor):
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
- display_id = re.match(self._VALID_URL, url).group('display_id') or video_id
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ media_type = mobj.group('media_type')
+ display_id = mobj.group('display_id') or video_id
video_data = self._download_xml(
- 'http://il.srgssr.ch/integrationlayer/1.0/ue/srf/video/play/%s.xml' % video_id,
+ 'http://il.srgssr.ch/integrationlayer/1.0/ue/srf/%s/play/%s.xml' % (media_type, video_id),
display_id)
title = xpath_text(
@@ -64,7 +80,7 @@ class SrfIE(InfoExtractor):
for url_node in item.findall('url'):
quality = url_node.attrib['quality']
full_url = url_node.text
- original_ext = determine_ext(full_url)
+ original_ext = determine_ext(full_url).lower()
format_id = '%s-%s' % (quality, item.attrib['protocol'])
if original_ext == 'f4m':
formats.extend(self._extract_f4m_formats(
diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py
index 117afa9bf..e0477382c 100644
--- a/youtube_dl/extractor/teachingchannel.py
+++ b/youtube_dl/extractor/teachingchannel.py
@@ -16,6 +16,7 @@ class TeachingChannelIE(InfoExtractor):
'ext': 'mp4',
'title': 'A History of Teaming',
'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
+ 'duration': 422.255,
},
'params': {
# m3u8 download
diff --git a/youtube_dl/extractor/trilulilu.py b/youtube_dl/extractor/trilulilu.py
index 185accc4b..a800449e9 100644
--- a/youtube_dl/extractor/trilulilu.py
+++ b/youtube_dl/extractor/trilulilu.py
@@ -1,80 +1,103 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+)
class TriluliluIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?trilulilu\.ro/(?:video-[^/]+/)?(?P[^/#\?]+)'
- _TEST = {
- 'url': 'http://www.trilulilu.ro/video-animatie/big-buck-bunny-1',
- 'md5': 'c1450a00da251e2769b74b9005601cac',
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?trilulilu\.ro/(?:[^/]+/)?(?P[^/#\?]+)'
+ _TESTS = [{
+ 'url': 'http://www.trilulilu.ro/big-buck-bunny-1',
+ 'md5': '68da087b676a6196a413549212f60cc6',
'info_dict': {
'id': 'ae2899e124140b',
'ext': 'mp4',
'title': 'Big Buck Bunny',
'description': ':) pentru copilul din noi',
+ 'uploader_id': 'chipy',
+ 'upload_date': '20120304',
+ 'timestamp': 1330830647,
+ 'uploader': 'chipy',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
},
- }
+ }, {
+ 'url': 'http://www.trilulilu.ro/adena-ft-morreti-inocenta',
+ 'md5': '929dfb8729dc71750463af88bbbbf4a4',
+ 'info_dict': {
+ 'id': 'f299710e3c91c5',
+ 'ext': 'mp4',
+ 'title': 'Adena ft. Morreti - Inocenta',
+ 'description': 'pop music',
+ 'uploader_id': 'VEVOmixt',
+ 'upload_date': '20151204',
+ 'uploader': 'VEVOmixt',
+ 'timestamp': 1449187937,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ },
+ }]
def _real_extract(self, url):
display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
+ media_info = self._download_json('http://m.trilulilu.ro/%s?format=json' % display_id, display_id)
- if re.search(r'Fişierul nu este disponibil pentru vizionare în ţara dumneavoastră', webpage):
- raise ExtractorError(
- 'This video is not available in your country.', expected=True)
- elif re.search('Fişierul poate fi accesat doar de către prietenii lui', webpage):
+ age_limit = 0
+ errors = media_info.get('errors', {})
+ if errors.get('friends'):
raise ExtractorError('This video is private.', expected=True)
+ elif errors.get('geoblock'):
+ raise ExtractorError('This video is not available in your country.', expected=True)
+ elif errors.get('xxx_unlogged'):
+ age_limit = 18
- flashvars_str = self._search_regex(
- r'block_flash_vars\s*=\s*(\{[^\}]+\})', webpage, 'flashvars', fatal=False, default=None)
+ media_class = media_info.get('class')
+ if media_class not in ('video', 'audio'):
+ raise ExtractorError('not a video or an audio')
- if flashvars_str:
- flashvars = self._parse_json(flashvars_str, display_id)
+ user = media_info.get('user', {})
+
+ thumbnail = media_info.get('cover_url')
+ if thumbnail:
+ thumbnail.format(width='1600', height='1200')
+
+ # TODO: get correct ext for audio files
+ stream_type = media_info.get('stream_type')
+ formats = [{
+ 'url': media_info['href'],
+ 'ext': stream_type,
+ }]
+ if media_info.get('is_hd'):
+ formats.append({
+ 'format_id': 'hd',
+ 'url': media_info['hrefhd'],
+ 'ext': stream_type,
+ })
+ if media_class == 'audio':
+ formats[0]['vcodec'] = 'none'
else:
- raise ExtractorError(
- 'This page does not contain videos', expected=True)
-
- if flashvars['isMP3'] == 'true':
- raise ExtractorError(
- 'Audio downloads are currently not supported', expected=True)
-
- video_id = flashvars['hash']
- title = self._og_search_title(webpage)
- thumbnail = self._og_search_thumbnail(webpage)
- description = self._og_search_description(webpage, default=None)
-
- format_url = ('http://fs%(server)s.trilulilu.ro/%(hash)s/'
- 'video-formats2' % flashvars)
- format_doc = self._download_xml(
- format_url, video_id,
- note='Downloading formats',
- errnote='Error while downloading formats')
-
- video_url_template = (
- 'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
- '&source=site&hash=%(hash)s&username=%(userid)s&'
- 'key=ministhebest&format=%%s&sig=&exp=' %
- flashvars)
- formats = [
- {
- 'format_id': fnode.text.partition('-')[2],
- 'url': video_url_template % fnode.text,
- 'ext': fnode.text.partition('-')[0]
- }
-
- for fnode in format_doc.findall('./formats/format')
- ]
+ formats[0]['format_id'] = 'sd'
return {
- 'id': video_id,
+ 'id': media_info['identifier'].split('|')[1],
'display_id': display_id,
'formats': formats,
- 'title': title,
- 'description': description,
+ 'title': media_info['title'],
+ 'description': media_info.get('description'),
'thumbnail': thumbnail,
+ 'uploader_id': user.get('username'),
+ 'uploader': user.get('fullname'),
+ 'timestamp': parse_iso8601(media_info.get('published'), ' '),
+ 'duration': int_or_none(media_info.get('duration')),
+ 'view_count': int_or_none(media_info.get('count_views')),
+ 'like_count': int_or_none(media_info.get('count_likes')),
+ 'comment_count': int_or_none(media_info.get('count_comments')),
+ 'age_limit': age_limit,
}
diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py
index 01af7a995..3db6286e4 100644
--- a/youtube_dl/extractor/vice.py
+++ b/youtube_dl/extractor/vice.py
@@ -15,6 +15,7 @@ class ViceIE(InfoExtractor):
'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
'ext': 'mp4',
'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
+ 'duration': 725.983,
},
'params': {
# Requires ffmpeg (m3u8 manifest)
diff --git a/youtube_dl/extractor/videoweed.py b/youtube_dl/extractor/videoweed.py
deleted file mode 100644
index ca2e50935..000000000
--- a/youtube_dl/extractor/videoweed.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from __future__ import unicode_literals
-
-from .novamov import NovaMovIE
-
-
-class VideoWeedIE(NovaMovIE):
- IE_NAME = 'videoweed'
- IE_DESC = 'VideoWeed'
-
- _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'videoweed\.(?:es|com)'}
-
- _HOST = 'www.videoweed.es'
-
- _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
- _TITLE_REGEX = r'([^<]+)
'
-
- _TEST = {
- 'url': 'http://www.videoweed.es/file/b42178afbea14',
- 'md5': 'abd31a2132947262c50429e1d16c1bfd',
- 'info_dict': {
- 'id': 'b42178afbea14',
- 'ext': 'flv',
- 'title': 'optical illusion dissapeared image magic illusion',
- 'description': ''
- },
- }
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index bd0de9f53..a4e9d7072 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2015.11.27.1'
+__version__ = '2015.12.06'