\s*([^<]+?)\s+\|\s+\d+\s+videos',
- webpage, 'title', default=page_id))
-
- return self.playlist_result(entries, page_id, title)
+ return self.playlist_result(entries, '%s_%s' % (page_id, section))
class VKWallPostIE(VKBaseIE):
@@ -514,15 +545,15 @@ class VKWallPostIE(VKBaseIE):
# public page URL, audio playlist
'url': 'https://vk.com/bs.official?w=wall-23538238_35',
'info_dict': {
- 'id': '23538238_35',
- 'title': 'Black Shadow - Wall post 23538238_35',
+ 'id': '-23538238_35',
+ 'title': 'Black Shadow - Wall post -23538238_35',
'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c',
},
'playlist': [{
'md5': '5ba93864ec5b85f7ce19a9af4af080f6',
'info_dict': {
'id': '135220665_111806521',
- 'ext': 'mp3',
+ 'ext': 'mp4',
'title': 'Black Shadow - Слепое Верование',
'duration': 370,
'uploader': 'Black Shadow',
@@ -533,18 +564,16 @@ class VKWallPostIE(VKBaseIE):
'md5': '4cc7e804579122b17ea95af7834c9233',
'info_dict': {
'id': '135220665_111802303',
- 'ext': 'mp3',
+ 'ext': 'mp4',
'title': 'Black Shadow - Война - Негасимое Бездны Пламя!',
'duration': 423,
'uploader': 'Black Shadow',
'artist': 'Black Shadow',
'track': 'Война - Негасимое Бездны Пламя!',
},
- 'params': {
- 'skip_download': True,
- },
}],
'params': {
+ 'skip_download': True,
'usenetrc': True,
},
'skip': 'Requires vk account credentials',
@@ -553,7 +582,7 @@ class VKWallPostIE(VKBaseIE):
'url': 'https://vk.com/wall85155021_6319',
'info_dict': {
'id': '85155021_6319',
- 'title': 'Sergey Gorbunov - Wall post 85155021_6319',
+ 'title': 'Сергей Горбунов - Wall post 85155021_6319',
},
'playlist_count': 1,
'params': {
@@ -569,58 +598,72 @@ class VKWallPostIE(VKBaseIE):
'url': 'https://m.vk.com/wall-23538238_35',
'only_matching': True,
}]
+ _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/='
+ _AUDIO = collections.namedtuple('Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads'])
+
+ def _decode(self, enc):
+ dec = ''
+ e = n = 0
+ for c in enc:
+ r = self._BASE64_CHARS.index(c)
+ cond = n % 4
+ e = 64 * e + r if cond else r
+ n += 1
+ if cond:
+ dec += chr(255 & e >> (-2 * n & 6))
+ return dec
+
+ def _unmask_url(self, mask_url, vk_id):
+ if 'audio_api_unavailable' in mask_url:
+ extra = mask_url.split('?extra=')[1].split('#')
+ func, base = self._decode(extra[1]).split(chr(11))
+ mask_url = list(self._decode(extra[0]))
+ url_len = len(mask_url)
+ indexes = [None] * url_len
+ index = int(base) ^ vk_id
+ for n in range(url_len - 1, -1, -1):
+ index = (url_len * (n + 1) ^ index + n) % url_len
+ indexes[n] = index
+ for n in range(1, url_len):
+ c = mask_url[n]
+ index = indexes[url_len - 1 - n]
+ mask_url[n] = mask_url[index]
+ mask_url[index] = c
+ mask_url = ''.join(mask_url)
+ return mask_url
def _real_extract(self, url):
post_id = self._match_id(url)
- wall_url = 'https://vk.com/wall%s' % post_id
-
- post_id = remove_start(post_id, '-')
-
- webpage = self._download_webpage(wall_url, post_id)
-
- error = self._html_search_regex(
- r'>Error\s*]+class=["\']body["\'][^>]*>([^<]+)',
- webpage, 'error', default=None)
- if error:
- raise ExtractorError('VK said: %s' % error, expected=True)
+ webpage = self._download_payload('wkview', post_id, {
+ 'act': 'show',
+ 'w': 'wall' + post_id,
+ })[1]
description = clean_html(get_element_by_class('wall_post_text', webpage))
uploader = clean_html(get_element_by_class('author', webpage))
- thumbnail = self._og_search_thumbnail(webpage)
entries = []
- audio_ids = re.findall(r'data-full-id=["\'](\d+_\d+)', webpage)
- if audio_ids:
- al_audio = self._download_webpage(
- 'https://vk.com/al_audio.php', post_id,
- note='Downloading audio info', fatal=False,
- data=urlencode_postdata({
- 'act': 'reload_audio',
- 'al': '1',
- 'ids': ','.join(audio_ids)
- }))
- if al_audio:
- Audio = collections.namedtuple(
- 'Audio', ['id', 'user_id', 'url', 'track', 'artist', 'duration'])
- audios = self._parse_json(
- self._search_regex(
- r'(.+?)', al_audio, 'audios', default='[]'),
- post_id, fatal=False, transform_source=unescapeHTML)
- if isinstance(audios, list):
- for audio in audios:
- a = Audio._make(audio[:6])
- entries.append({
- 'id': '%s_%s' % (a.user_id, a.id),
- 'url': a.url,
- 'title': '%s - %s' % (a.artist, a.track) if a.artist and a.track else a.id,
- 'thumbnail': thumbnail,
- 'duration': a.duration,
- 'uploader': uploader,
- 'artist': a.artist,
- 'track': a.track,
- })
+ for audio in re.findall(r'data-audio="([^"]+)', webpage):
+ audio = self._parse_json(unescapeHTML(audio), post_id)
+ a = self._AUDIO._make(audio[:16])
+ if not a.url:
+ continue
+ title = unescapeHTML(a.title)
+ performer = unescapeHTML(a.performer)
+ entries.append({
+ 'id': '%s_%s' % (a.owner_id, a.id),
+ 'url': self._unmask_url(a.url, a.ads['vk_id']),
+ 'title': '%s - %s' % (performer, title) if performer else title,
+ 'thumbnails': [{'url': c_url} for c_url in a.cover_url.split(',')] if a.cover_url else None,
+ 'duration': int_or_none(a.duration),
+ 'uploader': uploader,
+ 'artist': performer,
+ 'track': title,
+ 'ext': 'mp4',
+ 'protocol': 'm3u8',
+ })
for video in re.finditer(
r'
]+href=(["\'])(?P/video(?:-?[\d_]+).*?)\1', webpage):
diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py
index c3429f723..f79531e6f 100644
--- a/youtube_dl/extractor/vlive.py
+++ b/youtube_dl/extractor/vlive.py
@@ -6,22 +6,18 @@ import time
import itertools
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_urlencode,
- compat_str,
-)
+from .naver import NaverBaseIE
+from ..compat import compat_str
from ..utils import (
- dict_get,
ExtractorError,
- float_or_none,
- int_or_none,
+ merge_dicts,
remove_start,
try_get,
urlencode_postdata,
)
-class VLiveIE(InfoExtractor):
+class VLiveIE(NaverBaseIE):
IE_NAME = 'vlive'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P[0-9]+)'
_NETRC_MACHINE = 'vlive'
@@ -34,6 +30,7 @@ class VLiveIE(InfoExtractor):
'title': "[V LIVE] Girl's Day's Broadcast",
'creator': "Girl's Day",
'view_count': int,
+ 'uploader_id': 'muploader_a',
},
}, {
'url': 'http://www.vlive.tv/video/16937',
@@ -44,6 +41,7 @@ class VLiveIE(InfoExtractor):
'creator': 'EXO',
'view_count': int,
'subtitles': 'mincount:12',
+ 'uploader_id': 'muploader_j',
},
'params': {
'skip_download': True,
@@ -187,45 +185,9 @@ class VLiveIE(InfoExtractor):
'This video is only available for CH+ subscribers')
long_video_id, key = video_info['vid'], video_info['inkey']
- playinfo = self._download_json(
- 'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s'
- % compat_urllib_parse_urlencode({
- 'videoId': long_video_id,
- 'key': key,
- 'ptc': 'http',
- 'doct': 'json', # document type (xml or json)
- 'cpt': 'vtt', # captions type (vtt or ttml)
- }), video_id)
-
- formats = [{
- 'url': vid['source'],
- 'format_id': vid.get('encodingOption', {}).get('name'),
- 'abr': float_or_none(vid.get('bitrate', {}).get('audio')),
- 'vbr': float_or_none(vid.get('bitrate', {}).get('video')),
- 'width': int_or_none(vid.get('encodingOption', {}).get('width')),
- 'height': int_or_none(vid.get('encodingOption', {}).get('height')),
- 'filesize': int_or_none(vid.get('size')),
- } for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')]
- self._sort_formats(formats)
-
- view_count = int_or_none(playinfo.get('meta', {}).get('count'))
-
- subtitles = {}
- for caption in playinfo.get('captions', {}).get('list', []):
- lang = dict_get(caption, ('locale', 'language', 'country', 'label'))
- if lang and caption.get('source'):
- subtitles[lang] = [{
- 'ext': 'vtt',
- 'url': caption['source']}]
-
- info = self._get_common_fields(webpage)
- info.update({
- 'id': video_id,
- 'formats': formats,
- 'view_count': view_count,
- 'subtitles': subtitles,
- })
- return info
+ return merge_dicts(
+ self._get_common_fields(webpage),
+ self._extract_video_info(video_id, long_video_id, key))
def _download_init_page(self, video_id):
return self._download_webpage(
diff --git a/youtube_dl/extractor/vodplatform.py b/youtube_dl/extractor/vodplatform.py
index 239644340..74d2257e7 100644
--- a/youtube_dl/extractor/vodplatform.py
+++ b/youtube_dl/extractor/vodplatform.py
@@ -6,8 +6,8 @@ from ..utils import unescapeHTML
class VODPlatformIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vod-platform\.net/[eE]mbed/(?P[^/?#]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/(?P[^/?#]+)'
+ _TESTS = [{
# from http://www.lbcgroup.tv/watch/chapter/29143/52844/%D8%A7%D9%84%D9%86%D8%B5%D8%B1%D8%A9-%D9%81%D9%8A-%D8%B6%D9%8A%D8%A7%D9%81%D8%A9-%D8%A7%D9%84%D9%80-cnn/ar
'url': 'http://vod-platform.net/embed/RufMcytHDolTH1MuKHY9Fw',
'md5': '1db2b7249ce383d6be96499006e951fc',
@@ -16,7 +16,10 @@ class VODPlatformIE(InfoExtractor):
'ext': 'mp4',
'title': 'LBCi News_ النصرة في ضيافة الـ "سي.أن.أن"',
}
- }
+ }, {
+ 'url': 'http://embed.kwikmotion.com/embed/RufMcytHDolTH1MuKHY9Fw',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/youtube_dl/extractor/voicerepublic.py b/youtube_dl/extractor/voicerepublic.py
index 59e1359c4..a52e40afa 100644
--- a/youtube_dl/extractor/voicerepublic.py
+++ b/youtube_dl/extractor/voicerepublic.py
@@ -1,17 +1,12 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
-from ..compat import (
- compat_str,
- compat_urlparse,
-)
+from ..compat import compat_str
from ..utils import (
ExtractorError,
determine_ext,
int_or_none,
- sanitized_Request,
+ urljoin,
)
@@ -26,8 +21,7 @@ class VoiceRepublicIE(InfoExtractor):
'ext': 'm4a',
'title': 'Watching the Watchers: Building a Sousveillance State',
'description': 'Secret surveillance programs have metadata too. The people and companies that operate secret surveillance programs can be surveilled.',
- 'thumbnail': r're:^https?://.*\.(?:png|jpg)$',
- 'duration': 1800,
+ 'duration': 1556,
'view_count': int,
}
}, {
@@ -38,63 +32,31 @@ class VoiceRepublicIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
- req = sanitized_Request(
- compat_urlparse.urljoin(url, '/talks/%s' % display_id))
- # Older versions of Firefox get redirected to an "upgrade browser" page
- req.add_header('User-Agent', 'youtube-dl')
- webpage = self._download_webpage(req, display_id)
+ webpage = self._download_webpage(url, display_id)
if '>Queued for processing, please stand by...<' in webpage:
raise ExtractorError(
'Audio is still queued for processing', expected=True)
- config = self._search_regex(
- r'(?s)return ({.+?});\s*\n', webpage,
- 'data', default=None)
- data = self._parse_json(config, display_id, fatal=False) if config else None
- if data:
- title = data['title']
- description = data.get('teaser')
- talk_id = compat_str(data.get('talk_id') or display_id)
- talk = data['talk']
- duration = int_or_none(talk.get('duration'))
- formats = [{
- 'url': compat_urlparse.urljoin(url, talk_url),
- 'format_id': format_id,
- 'ext': determine_ext(talk_url) or format_id,
- 'vcodec': 'none',
- } for format_id, talk_url in talk['links'].items()]
- else:
- title = self._og_search_title(webpage)
- description = self._html_search_regex(
- r"(?s)]*>(.+?)
",
- webpage, 'description', fatal=False)
- talk_id = self._search_regex(
- [r"id='jc-(\d+)'", r"data-shareable-id='(\d+)'"],
- webpage, 'talk id', default=None) or display_id
- duration = None
- player = self._search_regex(
- r"class='vr-player jp-jplayer'([^>]+)>", webpage, 'player')
- formats = [{
- 'url': compat_urlparse.urljoin(url, talk_url),
- 'format_id': format_id,
- 'ext': determine_ext(talk_url) or format_id,
- 'vcodec': 'none',
- } for format_id, talk_url in re.findall(r"data-([^=]+)='([^']+)'", player)]
+ talk = self._parse_json(self._search_regex(
+ r'initialSnapshot\s*=\s*({.+?});',
+ webpage, 'talk'), display_id)['talk']
+ title = talk['title']
+ formats = [{
+ 'url': urljoin(url, talk_url),
+ 'format_id': format_id,
+ 'ext': determine_ext(talk_url) or format_id,
+ 'vcodec': 'none',
+ } for format_id, talk_url in talk['media_links'].items()]
self._sort_formats(formats)
- thumbnail = self._og_search_thumbnail(webpage)
- view_count = int_or_none(self._search_regex(
- r"class='play-count[^']*'>\s*(\d+) plays",
- webpage, 'play count', fatal=False))
-
return {
- 'id': talk_id,
+ 'id': compat_str(talk.get('id') or display_id),
'display_id': display_id,
'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'view_count': view_count,
+ 'description': talk.get('teaser'),
+ 'thumbnail': talk.get('image_url'),
+ 'duration': int_or_none(talk.get('archived_duration')),
+ 'view_count': int_or_none(talk.get('play_count')),
'formats': formats,
}
diff --git a/youtube_dl/extractor/vzaar.py b/youtube_dl/extractor/vzaar.py
index 3336e6c15..b7d02fca3 100644
--- a/youtube_dl/extractor/vzaar.py
+++ b/youtube_dl/extractor/vzaar.py
@@ -32,6 +32,18 @@ class VzaarIE(InfoExtractor):
'ext': 'mp3',
'title': 'MP3',
},
+ }, {
+ # hlsAes = true
+ 'url': 'https://view.vzaar.com/11379930/player',
+ 'info_dict': {
+ 'id': '11379930',
+ 'ext': 'mp4',
+ 'title': 'Videoaula',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
}, {
# with null videoTitle
'url': 'https://view.vzaar.com/20313539/download',
@@ -58,6 +70,7 @@ class VzaarIE(InfoExtractor):
f = {
'url': source_url,
'format_id': 'http',
+ 'preference': 1,
}
if 'audio' in source_url:
f.update({
@@ -75,13 +88,17 @@ class VzaarIE(InfoExtractor):
video_guid = video_data.get('guid')
usp = video_data.get('usp')
- if isinstance(video_guid, compat_str) and isinstance(usp, dict):
- m3u8_url = ('http://fable.vzaar.com/v4/usp/%s/%s.ism/.m3u8?'
- % (video_guid, video_id)) + '&'.join(
- '%s=%s' % (k, v) for k, v in usp.items())
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
+ if video_data.get('uspEnabled') and isinstance(video_guid, compat_str) and isinstance(usp, dict):
+ hls_aes = video_data.get('hlsAes')
+ qs = '&'.join('%s=%s' % (k, v) for k, v in usp.items())
+ url_templ = 'http://%%s.vzaar.com/v5/usp%s/%s/%s.ism%%s?' % ('aes' if hls_aes else '', video_guid, video_id)
+ m3u8_formats = self._extract_m3u8_formats(
+ url_templ % ('fable', '/.m3u8') + qs, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ if hls_aes:
+ for f in m3u8_formats:
+ f['_decryption_key_url'] = url_templ % ('goose', '') + qs
+ formats.extend(m3u8_formats)
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py
deleted file mode 100644
index ea234e3c5..000000000
--- a/youtube_dl/extractor/wimp.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from .youtube import YoutubeIE
-
-
-class WimpIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P[^/]+)'
- _TESTS = [{
- 'url': 'http://www.wimp.com/maru-is-exhausted/',
- 'md5': 'ee21217ffd66d058e8b16be340b74883',
- 'info_dict': {
- 'id': 'maru-is-exhausted',
- 'ext': 'mp4',
- 'title': 'Maru is exhausted.',
- 'description': 'md5:57e099e857c0a4ea312542b684a869b8',
- }
- }, {
- 'url': 'http://www.wimp.com/clowncar/',
- 'md5': '5c31ad862a90dc5b1f023956faec13fe',
- 'info_dict': {
- 'id': 'cG4CEr2aiSg',
- 'ext': 'webm',
- 'title': 'Basset hound clown car...incredible!',
- 'description': '5 of my Bassets crawled in this dog loo! www.bellinghambassets.com\n\nFor licensing/usage please contact: licensing(at)jukinmediadotcom',
- 'upload_date': '20140303',
- 'uploader': 'Gretchen Hoey',
- 'uploader_id': 'gretchenandjeff1',
- },
- 'add_ie': ['Youtube'],
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- youtube_id = self._search_regex(
- (r"videoId\s*:\s*[\"']([0-9A-Za-z_-]{11})[\"']",
- r'data-id=["\']([0-9A-Za-z_-]{11})'),
- webpage, 'video URL', default=None)
- if youtube_id:
- return self.url_result(youtube_id, YoutubeIE.ie_key())
-
- info_dict = self._extract_jwplayer_data(
- webpage, video_id, require_title=False)
-
- info_dict.update({
- 'id': video_id,
- 'title': self._og_search_title(webpage),
- 'description': self._og_search_description(webpage),
- })
-
- return info_dict
diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py
index fa142b974..168e5e901 100644
--- a/youtube_dl/extractor/wistia.py
+++ b/youtube_dl/extractor/wistia.py
@@ -12,9 +12,8 @@ from ..utils import (
class WistiaIE(InfoExtractor):
- _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P[a-z0-9]+)'
- _API_URL = 'http://fast.wistia.com/embed/medias/%s.json'
- _IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s'
+ _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P[a-z0-9]{10})'
+ _EMBED_BASE_URL = 'http://fast.wistia.com/embed/'
_TESTS = [{
'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
@@ -43,33 +42,35 @@ class WistiaIE(InfoExtractor):
'only_matching': True,
}]
+ # https://wistia.com/support/embed-and-share/video-on-your-website
@staticmethod
def _extract_url(webpage):
- match = re.search(
- r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/iframe/.+?)\1', webpage)
- if match:
- return unescapeHTML(match.group('url'))
+ urls = WistiaIE._extract_urls(webpage)
+ return urls[0] if urls else None
- match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P[^"\']+)', webpage)
- if match:
- return 'wistia:%s' % match.group('id')
-
- match = re.search(
- r'''(?sx)
- |$)',
- webpage, 'videoplayer applet', default=None)
- if config_json:
- config = self._parse_json(config_json, display_id, fatal=False)
- if config:
- sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi')
- if sapi and 'query' in sapi:
- info = self._extract_info(display_id, sapi, webpage)
- self._sort_formats(info['formats'])
- return info
-
- items_json = self._search_regex(
- r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
- default=None)
- if items_json is None:
- alias = self._search_regex(
- r'"aliases":{"video":"(.*?)"', webpage, 'alias', default=None)
- if alias is not None:
- alias_info = self._download_json(
- 'https://www.yahoo.com/_td/api/resource/VideoService.videos;video_aliases=["%s"]' % alias,
- display_id, 'Downloading alias info')
- video_id = alias_info[0]['id']
- else:
- CONTENT_ID_REGEXES = [
- r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
- r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"',
- r'"first_videoid"\s*:\s*"([^"]+)"',
- r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id),
- r']data-uuid=["\']([^"\']+)',
- r']+yahoo://article/view\?.*\buuid=([^&"\']+)',
- r']+["\']ytwnews://cavideo/(?:[^/]+/)+([\da-fA-F-]+)[&"\']',
- ]
- video_id = self._search_regex(
- CONTENT_ID_REGEXES, webpage, 'content ID')
+ url, country, display_id = re.match(self._VALID_URL, url).groups()
+ if not country:
+ country = 'us'
else:
- items = json.loads(items_json)
- info = items['mediaItems']['query']['results']['mediaObj'][0]
- # The 'meta' field is not always in the video webpage, we request it
- # from another page
- video_id = info['id']
- return self._get_info(video_id, display_id, webpage)
+ country = country.split('-')[0]
+ api_base = 'https://%s.yahoo.com/_td/api/resource/' % country
- def _extract_info(self, display_id, query, webpage):
- info = query['query']['results']['mediaObj'][0]
- meta = info.get('meta')
- video_id = info.get('id')
+ for i, uuid in enumerate(['url=' + url, 'ymedia-alias=' + display_id]):
+ content = self._download_json(
+ api_base + 'content;getDetailView=true;uuids=["%s"]' % uuid,
+ display_id, 'Downloading content JSON metadata', fatal=i == 1)
+ if content:
+ item = content['items'][0]
+ break
- if not meta:
- msg = info['status'].get('msg')
- if msg:
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, msg), expected=True)
- raise ExtractorError('Unable to extract media object meta')
+ if item.get('type') != 'video':
+ entries = []
+ cover = item.get('cover') or {}
+ if cover.get('type') == 'yvideo':
+ cover_url = cover.get('url')
+ if cover_url:
+ entries.append(self.url_result(
+ cover_url, 'Yahoo', cover.get('uuid')))
+
+ for e in item.get('body', []):
+ if e.get('type') == 'videoIframe':
+ iframe_url = e.get('url')
+ if not iframe_url:
+ continue
+ entries.append(self.url_result(iframe_url))
+
+ return self.playlist_result(
+ entries, item.get('uuid'),
+ item.get('title'), item.get('summary'))
+
+ video_id = item['uuid']
+ video = self._download_json(
+ api_base + 'VideoService.videos;view=full;video_ids=["%s"]' % video_id,
+ video_id, 'Downloading video JSON metadata')[0]
+ title = video['title']
+
+ if country == 'malaysia':
+ country = 'my'
+
+ is_live = video.get('live_state') == 'live'
+ fmts = ('m3u8',) if is_live else ('webm', 'mp4')
+
+ urls = []
formats = []
- for s in info['streams']:
- tbr = int_or_none(s.get('bitrate'))
- format_info = {
- 'width': int_or_none(s.get('width')),
- 'height': int_or_none(s.get('height')),
- 'tbr': tbr,
- }
-
- host = s['host']
- path = s['path']
- if host.startswith('rtmp'):
- fmt = 'rtmp'
- format_info.update({
- 'url': host,
- 'play_path': path,
- 'ext': 'flv',
- })
- else:
- if s.get('format') == 'm3u8_playlist':
- fmt = 'hls'
- format_info.update({
- 'protocol': 'm3u8_native',
- 'ext': 'mp4',
- })
- else:
- fmt = format_info['ext'] = determine_ext(path)
- format_url = compat_urlparse.urljoin(host, path)
- format_info['url'] = format_url
- format_info['format_id'] = fmt + ('-%d' % tbr if tbr else '')
- formats.append(format_info)
-
- closed_captions = self._html_search_regex(
- r'"closedcaptions":(\[[^\]]+\])', webpage, 'closed captions',
- default='[]')
-
- cc_json = self._parse_json(closed_captions, video_id, fatal=False)
subtitles = {}
- if cc_json:
- for closed_caption in cc_json:
- lang = closed_caption['lang']
- if lang not in subtitles:
- subtitles[lang] = []
- subtitles[lang].append({
- 'url': closed_caption['url'],
- 'ext': mimetype2ext(closed_caption['content_type']),
+ for fmt in fmts:
+ media_obj = self._download_json(
+ 'https://video-api.yql.yahoo.com/v1/video/sapi/streams/' + video_id,
+ video_id, 'Downloading %s JSON metadata' % fmt,
+ headers=self.geo_verification_headers(), query={
+ 'format': fmt,
+ 'region': country.upper(),
+ })['query']['results']['mediaObj'][0]
+ msg = media_obj.get('status', {}).get('msg')
+
+ for s in media_obj.get('streams', []):
+ host = s.get('host')
+ path = s.get('path')
+ if not host or not path:
+ continue
+ s_url = host + path
+ if s.get('format') == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ s_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ continue
+ tbr = int_or_none(s.get('bitrate'))
+ formats.append({
+ 'url': s_url,
+ 'format_id': fmt + ('-%d' % tbr if tbr else ''),
+ 'width': int_or_none(s.get('width')),
+ 'height': int_or_none(s.get('height')),
+ 'tbr': tbr,
+ 'fps': int_or_none(s.get('framerate')),
})
+ for cc in media_obj.get('closedcaptions', []):
+ cc_url = cc.get('url')
+ if not cc_url or cc_url in urls:
+ continue
+ urls.append(cc_url)
+ subtitles.setdefault(cc.get('lang') or 'en-US', []).append({
+ 'url': cc_url,
+ 'ext': mimetype2ext(cc.get('content_type')),
+ })
+
+ streaming_url = video.get('streaming_url')
+ if streaming_url and not is_live:
+ formats.extend(self._extract_m3u8_formats(
+ streaming_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+
+ if not formats and msg == 'geo restricted':
+ self.raise_geo_restricted()
+
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for thumb in video.get('thumbnails', []):
+ thumb_url = thumb.get('url')
+ if not thumb_url:
+ continue
+ thumbnails.append({
+ 'id': thumb.get('tag'),
+ 'url': thumb.get('url'),
+ 'width': int_or_none(thumb.get('width')),
+ 'height': int_or_none(thumb.get('height')),
+ })
+
+ series_info = video.get('series_info') or {}
+
return {
'id': video_id,
- 'display_id': display_id,
- 'title': unescapeHTML(meta['title']),
+ 'title': self._live_title(title) if is_live else title,
'formats': formats,
- 'description': clean_html(meta['description']),
- 'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage),
- 'duration': int_or_none(meta.get('duration')),
+ 'display_id': display_id,
+ 'thumbnails': thumbnails,
+ 'description': clean_html(video.get('description')),
+ 'timestamp': parse_iso8601(video.get('publish_time')),
'subtitles': subtitles,
+ 'duration': int_or_none(video.get('duration')),
+ 'view_count': int_or_none(video.get('view_count')),
+ 'is_live': is_live,
+ 'series': video.get('show_name'),
+ 'season_number': int_or_none(series_info.get('season_number')),
+ 'episode_number': int_or_none(series_info.get('episode_number')),
}
- def _get_info(self, video_id, display_id, webpage):
- region = self._search_regex(
- r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"',
- webpage, 'region', fatal=False, default='US').upper()
- formats = []
- info = {}
- for fmt in ('webm', 'mp4'):
- query_result = self._download_json(
- 'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' + video_id,
- display_id, 'Downloading %s video info' % fmt, query={
- 'protocol': 'http',
- 'region': region,
- 'format': fmt,
- })
- info = self._extract_info(display_id, query_result, webpage)
- formats.extend(info['formats'])
- formats.extend(self._extract_m3u8_formats(
- 'http://video.media.yql.yahoo.com/v1/hls/%s?region=%s' % (video_id, region),
- video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
- info['formats'] = formats
- return info
-
class YahooSearchIE(SearchInfoExtractor):
IE_DESC = 'Yahoo screen search'
@@ -523,7 +383,7 @@ class YahooGyaOPlayerIE(InfoExtractor):
'id': video_id,
'title': video['title'],
'url': smuggle_url(
- 'http://players.brightcove.net/4235717419001/default_default/index.html?videoId=' + video['videoId'],
+ 'http://players.brightcove.net/4235717419001/SyG5P0gjb_default/index.html?videoId=' + video['videoId'],
{'geo_countries': ['JP']}),
'description': video.get('longDescription'),
'ie_key': BrightcoveNewIE.ie_key(),
diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py
index dff69fcb7..88aabd272 100644
--- a/youtube_dl/extractor/youjizz.py
+++ b/youtube_dl/extractor/youjizz.py
@@ -44,7 +44,7 @@ class YouJizzIE(InfoExtractor):
encodings = self._parse_json(
self._search_regex(
- r'encodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings',
+ r'[Ee]ncodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings',
default='[]'),
video_id, fatal=False)
for encoding in encodings:
diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py
index d4eccb4b2..e7fca22de 100644
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
- sanitized_Request,
str_to_int,
unescapeHTML,
unified_strdate,
@@ -15,7 +14,7 @@ from ..aes import aes_decrypt_text
class YouPornIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?youporn\.com/watch/(?P\d+)/(?P[^/?#&]+)'
+ _VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P\d+)(?:/(?P[^/?#&]+))?'
_TESTS = [{
'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
'md5': '3744d24c50438cf5b6f6d59feb5055c2',
@@ -57,16 +56,28 @@ class YouPornIE(InfoExtractor):
'params': {
'skip_download': True,
},
+ }, {
+ 'url': 'https://www.youporn.com/embed/505835/sex-ed-is-it-safe-to-masturbate-daily/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.youporn.com/watch/505835',
+ 'only_matching': True,
}]
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'