diff --git a/README.md b/README.md
index df419abe8..e39f71281 100644
--- a/README.md
+++ b/README.md
@@ -319,7 +319,7 @@ which means you can modify it, redistribute it or use it however you like.
--all-formats Download all available video formats
--prefer-free-formats Prefer free video formats unless a specific
one is requested
- -F, --list-formats List all available formats of specified
+ -F, --list-formats List all available formats of requested
videos
--youtube-skip-dash-manifest Do not download the DASH manifests and
related data on YouTube videos
@@ -800,7 +800,21 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
Bugs and suggestions should be reported at: . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the irc channel #youtube-dl on freenode.
-**Please include the full output of youtube-dl when run with `-v`**.
+**Please include the full output of youtube-dl when run with `-v`**, i.e. add `-v` flag to your command line, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
+```
+$ youtube-dl -v http://www.youtube.com/watch?v=BaW_jenozKcj
+[debug] System config: []
+[debug] User config: []
+[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
+[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
+[debug] youtube-dl version 2015.12.06
+[debug] Git HEAD: 135392e
+[debug] Python version 2.6.6 - Windows-2003Server-5.2.3790-SP2
+[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
+[debug] Proxy map: {}
+...
+```
+**Do not post screenshots of verbose log only plain text is acceptable.**
The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 1df408610..bf26fecd7 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -15,8 +15,12 @@
- **abc.net.au**
- **Abc7News**
- **AcademicEarth:Course**
+ - **acast**
+ - **acast:channel**
- **AddAnime**
- **AdobeTV**
+ - **AdobeTVChannel**
+ - **AdobeTVShow**
- **AdobeTVVideo**
- **AdultSwim**
- **Aftenposten**
@@ -43,6 +47,7 @@
- **arte.tv:future**
- **AtresPlayer**
- **ATTTechChannel**
+ - **AudiMedia**
- **audiomack**
- **audiomack:album**
- **Azubu**
@@ -92,6 +97,7 @@
- **Clipfish**
- **cliphunter**
- **Clipsyndicate**
+ - **cloudtime**: CloudTime
- **Cloudy**
- **Clubic**
- **Clyp**
@@ -183,6 +189,7 @@
- **freespeech.org**
- **FreeVideo**
- **FunnyOrDie**
+ - **GameInformer**
- **Gamekings**
- **GameOne**
- **gameone:playlist**
@@ -307,7 +314,6 @@
- **MovieClips**
- **MovieFap**
- **Moviezine**
- - **movshare**: MovShare
- **MPORA**
- **MSNBC**
- **MTV**
@@ -480,6 +486,8 @@
- **Shared**: shared.sx and vivo.sx
- **ShareSix**
- **Sina**
+ - **skynewsarabia:video**
+ - **skynewsarabia:video**
- **Slideshare**
- **Slutload**
- **smotri**: Smotri.com
@@ -665,6 +673,7 @@
- **WebOfStories**
- **WebOfStoriesPlaylist**
- **Weibo**
+ - **wholecloud**: WholeCloud
- **Wimp**
- **Wistia**
- **WNL**
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 9a8c7da05..c642a1fbf 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1110,6 +1110,12 @@ class YoutubeDL(object):
'contain the video, try using '
'"-f %s+%s"' % (format_2, format_1))
return
+ # Formats must be opposite (video+audio)
+ if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
+ self.report_error(
+ 'Both formats %s and %s are video-only, you must specify "-f video+audio"'
+ % (format_1, format_2))
+ return
output_ext = (
formats_info[0]['ext']
if self.params.get('merge_output_format') is None
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index bbf656090..2acebfef6 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -3,9 +3,15 @@ from __future__ import unicode_literals
from .abc import ABCIE
from .abc7news import Abc7NewsIE
from .academicearth import AcademicEarthCourseIE
+from .acast import (
+ ACastIE,
+ ACastChannelIE,
+)
from .addanime import AddAnimeIE
from .adobetv import (
AdobeTVIE,
+ AdobeTVShowIE,
+ AdobeTVChannelIE,
AdobeTVVideoIE,
)
from .adultswim import AdultSwimIE
@@ -38,6 +44,7 @@ from .arte import (
)
from .atresplayer import AtresPlayerIE
from .atttechchannel import ATTTechChannelIE
+from .audimedia import AudiMediaIE
from .audiomack import AudiomackIE, AudiomackAlbumIE
from .azubu import AzubuIE
from .baidu import BaiduVideoIE
@@ -200,6 +207,7 @@ from .freesound import FreesoundIE
from .freespeech import FreespeechIE
from .freevideo import FreeVideoIE
from .funnyordie import FunnyOrDieIE
+from .gameinformer import GameInformerIE
from .gamekings import GamekingsIE
from .gameone import (
GameOneIE,
@@ -349,7 +357,6 @@ from .motherless import MotherlessIE
from .motorsport import MotorsportIE
from .movieclips import MovieClipsIE
from .moviezine import MoviezineIE
-from .movshare import MovShareIE
from .mtv import (
MTVIE,
MTVServicesEmbeddedIE,
@@ -415,7 +422,13 @@ from .noco import NocoIE
from .normalboots import NormalbootsIE
from .nosvideo import NosVideoIE
from .nova import NovaIE
-from .novamov import NovaMovIE
+from .novamov import (
+ NovaMovIE,
+ WholeCloudIE,
+ NowVideoIE,
+ VideoWeedIE,
+ CloudTimeIE,
+)
from .nowness import (
NownessIE,
NownessPlaylistIE,
@@ -425,7 +438,6 @@ from .nowtv import (
NowTVIE,
NowTVListIE,
)
-from .nowvideo import NowVideoIE
from .npo import (
NPOIE,
NPOLiveIE,
@@ -554,6 +566,10 @@ from .shahid import ShahidIE
from .shared import SharedIE
from .sharesix import ShareSixIE
from .sina import SinaIE
+from .skynewsarabia import (
+ SkyNewsArabiaIE,
+ SkyNewsArabiaArticleIE,
+)
from .slideshare import SlideshareIE
from .slutload import SlutloadIE
from .smotri import (
@@ -733,7 +749,6 @@ from .videofyme import VideofyMeIE
from .videomega import VideoMegaIE
from .videopremium import VideoPremiumIE
from .videott import VideoTtIE
-from .videoweed import VideoWeedIE
from .vidme import VidmeIE
from .vidzi import VidziIE
from .vier import VierIE, VierVideosIE
diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py
new file mode 100644
index 000000000..be7913bc7
--- /dev/null
+++ b/youtube_dl/extractor/acast.py
@@ -0,0 +1,70 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import int_or_none
+
+
+class ACastBaseIE(InfoExtractor):
+ _API_BASE_URL = 'https://www.acast.com/api/'
+
+
+class ACastIE(ACastBaseIE):
+ IE_NAME = 'acast'
+ _VALID_URL = r'https?://(?:www\.)?acast\.com/(?P[^/]+)/(?P[^/#?]+)'
+ _TEST = {
+ 'url': 'https://www.acast.com/condenasttraveler/-where-are-you-taipei-101-taiwan',
+ 'md5': 'ada3de5a1e3a2a381327d749854788bb',
+ 'info_dict': {
+ 'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
+ 'ext': 'mp3',
+ 'title': '"Where Are You?": Taipei 101, Taiwan',
+ 'timestamp': 1196172000000,
+ 'description': 'md5:0c5d8201dfea2b93218ea986c91eee6e',
+ 'duration': 211,
+ }
+ }
+
+ def _real_extract(self, url):
+ channel, display_id = re.match(self._VALID_URL, url).groups()
+ cast_data = self._download_json(self._API_BASE_URL + 'channels/%s/acasts/%s/playback' % (channel, display_id), display_id)
+
+ return {
+ 'id': compat_str(cast_data['id']),
+ 'display_id': display_id,
+ 'url': cast_data['blings'][0]['audio'],
+ 'title': cast_data['name'],
+ 'description': cast_data.get('description'),
+ 'thumbnail': cast_data.get('image'),
+ 'timestamp': int_or_none(cast_data.get('publishingDate')),
+ 'duration': int_or_none(cast_data.get('duration')),
+ }
+
+
+class ACastChannelIE(ACastBaseIE):
+ IE_NAME = 'acast:channel'
+ _VALID_URL = r'https?://(?:www\.)?acast\.com/(?P[^/#?]+)'
+ _TEST = {
+ 'url': 'https://www.acast.com/condenasttraveler',
+ 'info_dict': {
+ 'id': '50544219-29bb-499e-a083-6087f4cb7797',
+ 'title': 'Condé Nast Traveler Podcast',
+ 'description': 'md5:98646dee22a5b386626ae31866638fbd',
+ },
+ 'playlist_mincount': 20,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ channel_data = self._download_json(self._API_BASE_URL + 'channels/%s' % display_id, display_id)
+ casts = self._download_json(self._API_BASE_URL + 'channels/%s/acasts' % display_id, display_id)
+ entries = [self.url_result('https://www.acast.com/%s/%s' % (display_id, cast['url']), 'ACast') for cast in casts]
+
+ return self.playlist_result(entries, compat_str(channel_data['id']), channel_data['name'], channel_data.get('description'))
diff --git a/youtube_dl/extractor/adobetv.py b/youtube_dl/extractor/adobetv.py
index 5e43adc51..8753ee2cf 100644
--- a/youtube_dl/extractor/adobetv.py
+++ b/youtube_dl/extractor/adobetv.py
@@ -1,23 +1,32 @@
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
parse_duration,
unified_strdate,
str_to_int,
+ int_or_none,
float_or_none,
ISO639Utils,
+ determine_ext,
)
-class AdobeTVIE(InfoExtractor):
- _VALID_URL = r'https?://tv\.adobe\.com/watch/[^/]+/(?P[^/]+)'
+class AdobeTVBaseIE(InfoExtractor):
+ _API_BASE_URL = 'http://tv.adobe.com/api/v4/'
+
+
+class AdobeTVIE(AdobeTVBaseIE):
+ _VALID_URL = r'https?://tv\.adobe\.com/(?:(?Pfr|de|es|jp)/)?watch/(?P[^/]+)/(?P[^/]+)'
_TEST = {
'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/',
'md5': '9bc5727bcdd55251f35ad311ca74fa1e',
'info_dict': {
- 'id': 'quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop',
+ 'id': '10981',
'ext': 'mp4',
'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop',
'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311',
@@ -29,50 +38,106 @@ class AdobeTVIE(InfoExtractor):
}
def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ language, show_urlname, urlname = re.match(self._VALID_URL, url).groups()
+ if not language:
+ language = 'en'
- player = self._parse_json(
- self._search_regex(r'html5player:\s*({.+?})\s*\n', webpage, 'player'),
- video_id)
-
- title = player.get('title') or self._search_regex(
- r'data-title="([^"]+)"', webpage, 'title')
- description = self._og_search_description(webpage)
- thumbnail = self._og_search_thumbnail(webpage)
-
- upload_date = unified_strdate(
- self._html_search_meta('datepublished', webpage, 'upload date'))
-
- duration = parse_duration(
- self._html_search_meta('duration', webpage, 'duration') or
- self._search_regex(
- r'Runtime:\s*(\d{2}:\d{2}:\d{2})',
- webpage, 'duration', fatal=False))
-
- view_count = str_to_int(self._search_regex(
- r'\s*Views?:\s*([\d,.]+)\s*
',
- webpage, 'view count'))
+ video_data = self._download_json(
+ self._API_BASE_URL + 'episode/get/?language=%s&show_urlname=%s&urlname=%s&disclosure=standard' % (language, show_urlname, urlname),
+ urlname)['data'][0]
formats = [{
- 'url': source['src'],
- 'format_id': source.get('quality') or source['src'].split('-')[-1].split('.')[0] or None,
- 'tbr': source.get('bitrate'),
- } for source in player['sources']]
+ 'url': source['url'],
+ 'format_id': source.get('quality_level') or source['url'].split('-')[-1].split('.')[0] or None,
+ 'width': int_or_none(source.get('width')),
+ 'height': int_or_none(source.get('height')),
+ 'tbr': int_or_none(source.get('video_data_rate')),
+ } for source in video_data['videos']]
self._sort_formats(formats)
return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'upload_date': upload_date,
- 'duration': duration,
- 'view_count': view_count,
+ 'id': compat_str(video_data['id']),
+ 'title': video_data['title'],
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('thumbnail'),
+ 'upload_date': unified_strdate(video_data.get('start_date')),
+ 'duration': parse_duration(video_data.get('duration')),
+ 'view_count': str_to_int(video_data.get('playcount')),
'formats': formats,
}
+class AdobeTVPlaylistBaseIE(AdobeTVBaseIE):
+ def _parse_page_data(self, page_data):
+ return [self.url_result(self._get_element_url(element_data)) for element_data in page_data]
+
+ def _extract_playlist_entries(self, url, display_id):
+ page = self._download_json(url, display_id)
+ entries = self._parse_page_data(page['data'])
+ for page_num in range(2, page['paging']['pages'] + 1):
+ entries.extend(self._parse_page_data(
+ self._download_json(url + '&page=%d' % page_num, display_id)['data']))
+ return entries
+
+
+class AdobeTVShowIE(AdobeTVPlaylistBaseIE):
+ _VALID_URL = r'https?://tv\.adobe\.com/(?:(?Pfr|de|es|jp)/)?show/(?P[^/]+)'
+
+ _TEST = {
+ 'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost',
+ 'info_dict': {
+ 'id': '36',
+ 'title': 'The Complete Picture with Julieanne Kost',
+ 'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27',
+ },
+ 'playlist_mincount': 136,
+ }
+
+ def _get_element_url(self, element_data):
+ return element_data['urls'][0]
+
+ def _real_extract(self, url):
+ language, show_urlname = re.match(self._VALID_URL, url).groups()
+ if not language:
+ language = 'en'
+ query = 'language=%s&show_urlname=%s' % (language, show_urlname)
+
+ show_data = self._download_json(self._API_BASE_URL + 'show/get/?%s' % query, show_urlname)['data'][0]
+
+ return self.playlist_result(
+ self._extract_playlist_entries(self._API_BASE_URL + 'episode/?%s' % query, show_urlname),
+ compat_str(show_data['id']),
+ show_data['show_name'],
+ show_data['show_description'])
+
+
+class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
+ _VALID_URL = r'https?://tv\.adobe\.com/(?:(?Pfr|de|es|jp)/)?channel/(?P[^/]+)(?:/(?P[^/]+))?'
+
+ _TEST = {
+ 'url': 'http://tv.adobe.com/channel/development',
+ 'info_dict': {
+ 'id': 'development',
+ },
+ 'playlist_mincount': 96,
+ }
+
+ def _get_element_url(self, element_data):
+ return element_data['url']
+
+ def _real_extract(self, url):
+ language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups()
+ if not language:
+ language = 'en'
+ query = 'language=%s&channel_urlname=%s' % (language, channel_urlname)
+ if category_urlname:
+ query += '&category_urlname=%s' % category_urlname
+
+ return self.playlist_result(
+ self._extract_playlist_entries(self._API_BASE_URL + 'show/?%s' % query, channel_urlname),
+ channel_urlname)
+
+
class AdobeTVVideoIE(InfoExtractor):
_VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P\d+)'
@@ -91,28 +156,25 @@ class AdobeTVVideoIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- player_params = self._parse_json(self._search_regex(
- r'var\s+bridge\s*=\s*([^;]+);', webpage, 'player parameters'),
- video_id)
+ video_data = self._download_json(url + '?format=json', video_id)
formats = [{
+ 'format_id': '%s-%s' % (determine_ext(source['src']), source.get('height')),
'url': source['src'],
- 'width': source.get('width'),
- 'height': source.get('height'),
- 'tbr': source.get('bitrate'),
- } for source in player_params['sources']]
+ 'width': int_or_none(source.get('width')),
+ 'height': int_or_none(source.get('height')),
+ 'tbr': int_or_none(source.get('bitrate')),
+ } for source in video_data['sources']]
+ self._sort_formats(formats)
# For both metadata and downloaded files the duration varies among
# formats. I just pick the max one
duration = max(filter(None, [
float_or_none(source.get('duration'), scale=1000)
- for source in player_params['sources']]))
+ for source in video_data['sources']]))
subtitles = {}
- for translation in player_params.get('translations', []):
+ for translation in video_data.get('translations', []):
lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
if lang_id not in subtitles:
subtitles[lang_id] = []
@@ -124,8 +186,9 @@ class AdobeTVVideoIE(InfoExtractor):
return {
'id': video_id,
'formats': formats,
- 'title': player_params['title'],
- 'description': self._og_search_description(webpage),
+ 'title': video_data['title'],
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data['video'].get('poster'),
'duration': duration,
'subtitles': subtitles,
}
diff --git a/youtube_dl/extractor/audimedia.py b/youtube_dl/extractor/audimedia.py
new file mode 100644
index 000000000..b0b089dee
--- /dev/null
+++ b/youtube_dl/extractor/audimedia.py
@@ -0,0 +1,80 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ sanitized_Request,
+)
+
+
+class AudiMediaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?audimedia\.tv/(?:en|de)/vid/(?P[^/?#]+)'
+ _TEST = {
+ 'url': 'https://audimedia.tv/en/vid/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test',
+ 'md5': '79a8b71c46d49042609795ab59779b66',
+ 'info_dict': {
+ 'id': '1564',
+ 'ext': 'mp4',
+ 'title': '60 Seconds of Audi Sport 104/2015 - WEC Bahrain, Rookie Test',
+ 'description': 'md5:60e5d30a78ced725f7b8d34370762941',
+ 'upload_date': '20151124',
+ 'timestamp': 1448354940,
+ 'duration': 74022,
+ 'view_count': int,
+ }
+ }
+ # extracted from https://audimedia.tv/assets/embed/embedded-player.js (dataSourceAuthToken)
+ _AUTH_TOKEN = 'e25b42847dba18c6c8816d5d8ce94c326e06823ebf0859ed164b3ba169be97f2'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ raw_payload = self._search_regex(r'', embed_page,
+ 'embed vars')
+ info = self._parse_json(embed_vars_json, video_id)
+
+ formats = []
+ for media in info['media']:
+ if media['mediaPurpose'] == 'play':
+ formats.append({
+ 'url': media['uri'],
+ 'height': media['height'],
+ 'width:': media['width'],
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': info.get('thumbUri'),
+ 'description': self._og_search_description(webpage),
+ 'duration': int_or_none(info.get('videoLengthInSeconds')),
+ 'age_limit': parse_age_limit(info.get('audienceRating')),
+ }
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 5075d131e..c2e8f9b62 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -54,6 +54,7 @@ from .onionstudios import OnionStudiosIE
from .snagfilms import SnagFilmsEmbedIE
from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE
+from .pladform import PladformIE
class GenericIE(InfoExtractor):
@@ -339,6 +340,7 @@ class GenericIE(InfoExtractor):
'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
'ext': 'mp4',
'title': '2cc213299525360.mov', # that's what we get
+ 'duration': 238.231,
},
'add_ie': ['Ooyala'],
},
@@ -350,6 +352,7 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'title': '"Steve Jobs: Man in the Machine" trailer',
'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
+ 'duration': 135.427,
},
'params': {
'skip_download': True,
@@ -960,8 +963,9 @@ class GenericIE(InfoExtractor):
'info_dict': {
'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
'ext': 'mp4',
- 'description': 'VIDEO: Index/Match versus VLOOKUP.',
+ 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
'title': 'This is what separates the Excel masters from the wannabes',
+ 'duration': 191.933,
},
'params': {
# m3u8 downloads
@@ -1501,7 +1505,7 @@ class GenericIE(InfoExtractor):
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P.{32})[\'"]\)', webpage) or
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P.{32})[\'"]', webpage))
if mobj is not None:
- return OoyalaIE._build_url_result(mobj.group('ec'))
+ return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
# Look for multiple Ooyala embeds on SBN network websites
mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
@@ -1509,7 +1513,7 @@ class GenericIE(InfoExtractor):
embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
if embeds:
return _playlist_from_matches(
- embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
+ embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
# Look for Aparat videos
mobj = re.search(r'
'
- _DESCRIPTION_REGEX = r'Description: ([^<]+)'
-
- _TEST = {
- 'url': 'http://www.movshare.net/video/559e28be54d96',
- 'md5': 'abd31a2132947262c50429e1d16c1bfd',
- 'info_dict': {
- 'id': '559e28be54d96',
- 'ext': 'flv',
- 'title': 'dissapeared image',
- 'description': 'optical illusion dissapeared image magic illusion',
- }
- }
diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py
index 944096e1c..7c6b7841d 100644
--- a/youtube_dl/extractor/nba.py
+++ b/youtube_dl/extractor/nba.py
@@ -1,63 +1,102 @@
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import (
- remove_end,
parse_duration,
+ int_or_none,
+ xpath_text,
+ xpath_attr,
)
class NBAIE(InfoExtractor):
- _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P/[^?]*?)/?(?:/index\.html)?(?:\?.*)?$'
+ _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P(?:[^/]+/)?video/(?P[^?]*?))/?(?:/index\.html)?(?:\?.*)?$'
_TESTS = [{
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
- 'md5': 'c0edcfc37607344e2ff8f13c378c88a4',
+ 'md5': '9e7729d3010a9c71506fd1248f74e4f4',
'info_dict': {
- 'id': '0021200253-okc-bkn-recap.nba',
- 'ext': 'mp4',
+ 'id': '0021200253-okc-bkn-recap',
+ 'ext': 'flv',
'title': 'Thunder vs. Nets',
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
'duration': 181,
+ 'timestamp': 1354638466,
+ 'upload_date': '20121204',
},
}, {
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
'only_matching': True,
}, {
- 'url': 'http://watch.nba.com/nba/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
+ 'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
+ 'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
'info_dict': {
- 'id': '0041400301-cle-atl-recap.nba',
+ 'id': '0041400301-cle-atl-recap',
'ext': 'mp4',
- 'title': 'NBA GAME TIME | Video: Hawks vs. Cavaliers Game 1',
+ 'title': 'Hawks vs. Cavaliers Game 1',
'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
'duration': 228,
- },
- 'params': {
- 'skip_download': True,
+ 'timestamp': 1432134543,
+ 'upload_date': '20150520',
}
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ path, video_id = re.match(self._VALID_URL, url).groups()
+ if path.startswith('nba/'):
+ path = path[3:]
+ video_info = self._download_xml('http://www.nba.com/%s.xml' % path, video_id)
+ video_id = xpath_text(video_info, 'slug')
+ title = xpath_text(video_info, 'headline')
+ description = xpath_text(video_info, 'description')
+ duration = parse_duration(xpath_text(video_info, 'length'))
+ timestamp = int_or_none(xpath_attr(video_info, 'dateCreated', 'uts'))
- video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
+ thumbnails = []
+ for image in video_info.find('images'):
+ thumbnails.append({
+ 'id': image.attrib.get('cut'),
+ 'url': image.text,
+ 'width': int_or_none(image.attrib.get('width')),
+ 'height': int_or_none(image.attrib.get('height')),
+ })
- shortened_video_id = video_id.rpartition('/')[2]
- title = remove_end(
- self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com')
-
- description = self._og_search_description(webpage)
- duration_str = self._html_search_meta(
- 'duration', webpage, 'duration', default=None)
- if not duration_str:
- duration_str = self._html_search_regex(
- r'Duration:\s*(\d+:\d+)', webpage, 'duration', fatal=False)
- duration = parse_duration(duration_str)
+ formats = []
+ for video_file in video_info.findall('.//file'):
+ video_url = video_file.text
+ if video_url.startswith('/'):
+ continue
+ if video_url.endswith('.m3u8'):
+ m3u8_formats = self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+ elif video_url.endswith('.f4m'):
+ f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.4.1.1', video_id, f4m_id='hds', fatal=False)
+ if f4m_formats:
+ formats.extend(f4m_formats)
+ else:
+ key = video_file.attrib.get('bitrate')
+ format_info = {
+ 'format_id': key,
+ 'url': video_url,
+ }
+ mobj = re.search(r'(\d+)x(\d+)(?:_(\d+))?', key)
+ if mobj:
+ format_info.update({
+ 'width': int(mobj.group(1)),
+ 'height': int(mobj.group(2)),
+ 'tbr': int_or_none(mobj.group(3)),
+ })
+ formats.append(format_info)
+ self._sort_formats(formats)
return {
- 'id': shortened_video_id,
- 'url': video_url,
+ 'id': video_id,
'title': title,
'description': description,
'duration': duration,
+ 'timestamp': timestamp,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
}
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py
index e683d24c4..4c1eca96f 100644
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -11,6 +11,7 @@ from ..utils import (
ExtractorError,
find_xpath_attr,
lowercase_escape,
+ smuggle_url,
unescapeHTML,
)
@@ -62,12 +63,13 @@ class NBCIE(InfoExtractor):
theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex(
[
r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
+ r'