This commit is contained in:
Gilles Habran 2016-07-05 08:29:37 +02:00
commit f51f4723f3
11 changed files with 132 additions and 49 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.03.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.05*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.03.1** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.05**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.07.03.1 [debug] youtube-dl version 2016.07.05
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -103,9 +103,9 @@ which means you can modify it, redistribute it or use it however you like.
(experimental) (experimental)
-6, --force-ipv6 Make all connections via IPv6 -6, --force-ipv6 Make all connections via IPv6
(experimental) (experimental)
--cn-verification-proxy URL Use this proxy to verify the IP address for --geo-verification-proxy URL Use this proxy to verify the IP address for
some Chinese sites. The default proxy some geo-restricted sites. The default
specified by --proxy (or none, if the proxy specified by --proxy (or none, if the
options is not present) is used for the options is not present) is used for the
actual downloading. (experimental) actual downloading. (experimental)

View File

@ -405,6 +405,12 @@ class TestUtil(unittest.TestCase):
self.assertEqual(res_url, url) self.assertEqual(res_url, url)
self.assertEqual(res_data, None) self.assertEqual(res_data, None)
smug_url = smuggle_url(url, {'a': 'b'})
smug_smug_url = smuggle_url(smug_url, {'c': 'd'})
res_url, res_data = unsmuggle_url(smug_smug_url)
self.assertEqual(res_url, url)
self.assertEqual(res_data, {'a': 'b', 'c': 'd'})
def test_shell_quote(self): def test_shell_quote(self):
args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')] args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""") self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""")

View File

@ -1066,6 +1066,7 @@ from .youtube import (
YoutubeSearchDateIE, YoutubeSearchDateIE,
YoutubeSearchIE, YoutubeSearchIE,
YoutubeSearchURLIE, YoutubeSearchURLIE,
YoutubeSharedVideoIE,
YoutubeShowIE, YoutubeShowIE,
YoutubeSubscriptionsIE, YoutubeSubscriptionsIE,
YoutubeTruncatedIDIE, YoutubeTruncatedIDIE,

View File

@ -1295,6 +1295,21 @@ class GenericIE(InfoExtractor):
'uploader': 'cylus cyrus', 'uploader': 'cylus cyrus',
}, },
}, },
{
# video stored on custom kaltura server
'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
'md5': '537617d06e64dfed891fa1593c4b30cc',
'info_dict': {
'id': '0_1iotm5bh',
'ext': 'mp4',
'title': 'Elecciones británicas: 5 lecciones para Rajoy',
'description': 'md5:435a89d68b9760b92ce67ed227055f16',
'uploader_id': 'videos.expansion@el-mundo.net',
'upload_date': '20150429',
'timestamp': 1430303472,
},
'add_ie': ['Kaltura'],
},
] ]
def report_following_redirect(self, new_url): def report_following_redirect(self, new_url):

View File

@ -6,7 +6,6 @@ import base64
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse_urlencode,
compat_urlparse, compat_urlparse,
compat_parse_qs, compat_parse_qs,
) )
@ -15,6 +14,7 @@ from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
unsmuggle_url, unsmuggle_url,
smuggle_url,
) )
@ -34,7 +34,8 @@ class KalturaIE(InfoExtractor):
)(?:/(?P<path>[^?]+))?(?:\?(?P<query>.*))? )(?:/(?P<path>[^?]+))?(?:\?(?P<query>.*))?
) )
''' '''
_API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?' _SERVICE_URL = 'http://cdnapi.kaltura.com'
_SERVICE_BASE = '/api_v3/index.php'
_TESTS = [ _TESTS = [
{ {
'url': 'kaltura:269692:1_1jc2y3e4', 'url': 'kaltura:269692:1_1jc2y3e4',
@ -88,18 +89,26 @@ class KalturaIE(InfoExtractor):
(?P<q3>["\'])(?P<id>.+?)(?P=q3) (?P<q3>["\'])(?P<id>.+?)(?P=q3)
''', webpage)) ''', webpage))
if mobj: if mobj:
return 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict() embed_info = mobj.groupdict()
url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
escaped_pid = re.escape(embed_info['partner_id'])
service_url = re.search(
r'<script[^>]+src=["\']((?:https?:)?//.+?)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
webpage)
if service_url:
url = smuggle_url(url, {'service_url': service_url.group(1)})
return url
def _kaltura_api_call(self, video_id, actions, *args, **kwargs): def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
params = actions[0] params = actions[0]
if len(actions) > 1: if len(actions) > 1:
for i, a in enumerate(actions[1:], start=1): for i, a in enumerate(actions[1:], start=1):
for k, v in a.items(): for k, v in a.items():
params['%d:%s' % (i, k)] = v params['%d:%s' % (i, k)] = v
query = compat_urllib_parse_urlencode(params) data = self._download_json(
url = self._API_BASE + query (service_url or self._SERVICE_URL) + self._SERVICE_BASE,
data = self._download_json(url, video_id, *args, **kwargs) video_id, query=params, *args, **kwargs)
status = data if len(actions) == 1 else data[0] status = data if len(actions) == 1 else data[0]
if status.get('objectType') == 'KalturaAPIException': if status.get('objectType') == 'KalturaAPIException':
@ -108,7 +117,7 @@ class KalturaIE(InfoExtractor):
return data return data
def _get_kaltura_signature(self, video_id, partner_id): def _get_kaltura_signature(self, video_id, partner_id, service_url=None):
actions = [{ actions = [{
'apiVersion': '3.1', 'apiVersion': '3.1',
'expiry': 86400, 'expiry': 86400,
@ -118,10 +127,10 @@ class KalturaIE(InfoExtractor):
'widgetId': '_%s' % partner_id, 'widgetId': '_%s' % partner_id,
}] }]
return self._kaltura_api_call( return self._kaltura_api_call(
video_id, actions, note='Downloading Kaltura signature')['ks'] video_id, actions, service_url, note='Downloading Kaltura signature')['ks']
def _get_video_info(self, video_id, partner_id): def _get_video_info(self, video_id, partner_id, service_url=None):
signature = self._get_kaltura_signature(video_id, partner_id) signature = self._get_kaltura_signature(video_id, partner_id, service_url)
actions = [ actions = [
{ {
'action': 'null', 'action': 'null',
@ -144,7 +153,7 @@ class KalturaIE(InfoExtractor):
}, },
] ]
return self._kaltura_api_call( return self._kaltura_api_call(
video_id, actions, note='Downloading video info JSON') video_id, actions, service_url, note='Downloading video info JSON')
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
@ -153,7 +162,7 @@ class KalturaIE(InfoExtractor):
partner_id, entry_id = mobj.group('partner_id', 'id') partner_id, entry_id = mobj.group('partner_id', 'id')
ks = None ks = None
if partner_id and entry_id: if partner_id and entry_id:
info, flavor_assets = self._get_video_info(entry_id, partner_id) info, flavor_assets = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url'))
else: else:
path, query = mobj.group('path', 'query') path, query = mobj.group('path', 'query')
if not path and not query: if not path and not query:
@ -201,12 +210,17 @@ class KalturaIE(InfoExtractor):
unsigned_url += '?referrer=%s' % referrer unsigned_url += '?referrer=%s' % referrer
return unsigned_url return unsigned_url
data_url = info['dataUrl']
if '/flvclipper/' in data_url:
data_url = re.sub(r'/flvclipper/.*', '/serveFlavor', data_url)
formats = [] formats = []
for f in flavor_assets: for f in flavor_assets:
# Continue if asset is not ready # Continue if asset is not ready
if f['status'] != 2: if f['status'] != 2:
continue continue
video_url = sign_url('%s/flavorId/%s' % (info['dataUrl'], f['id'])) video_url = sign_url(
'%s/flavorId/%s' % (data_url, f['id']))
formats.append({ formats.append({
'format_id': '%(fileExt)s-%(bitrate)s' % f, 'format_id': '%(fileExt)s-%(bitrate)s' % f,
'ext': f.get('fileExt'), 'ext': f.get('fileExt'),
@ -219,9 +233,12 @@ class KalturaIE(InfoExtractor):
'width': int_or_none(f.get('width')), 'width': int_or_none(f.get('width')),
'url': video_url, 'url': video_url,
}) })
m3u8_url = sign_url(info['dataUrl'].replace('format/url', 'format/applehttp')) if '/playManifest/' in data_url:
m3u8_url = sign_url(data_url.replace(
'format/url', 'format/applehttp'))
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
m3u8_url, entry_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) m3u8_url, entry_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
self._check_formats(formats, entry_id) self._check_formats(formats, entry_id)
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -3,8 +3,8 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext,
js_to_json, js_to_json,
smuggle_url,
) )
@ -18,13 +18,16 @@ class LA7IE(InfoExtractor):
_TESTS = [{ _TESTS = [{
# 'src' is a plain URL # 'src' is a plain URL
'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722', 'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722',
'md5': '6054674766e7988d3e02f2148ff92180', 'md5': '8b613ffc0c4bf9b9e377169fc19c214c',
'info_dict': { 'info_dict': {
'id': 'inccool8-02-10-2015-163722', 'id': 'inccool8-02-10-2015-163722',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Inc.Cool8', 'title': 'Inc.Cool8',
'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico', 'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico',
'thumbnail': 're:^https?://.*', 'thumbnail': 're:^https?://.*',
'uploader_id': 'kdla7pillole@iltrovatore.it',
'timestamp': 1443814869,
'upload_date': '20151002',
}, },
}, { }, {
# 'src' is a dictionary # 'src' is a dictionary
@ -49,26 +52,14 @@ class LA7IE(InfoExtractor):
self._search_regex(r'videoLa7\(({[^;]+})\);', webpage, 'player data'), self._search_regex(r'videoLa7\(({[^;]+})\);', webpage, 'player data'),
video_id, transform_source=js_to_json) video_id, transform_source=js_to_json)
source = player_data['src']
source_urls = source.values() if isinstance(source, dict) else [source]
formats = []
for source_url in source_urls:
ext = determine_ext(source_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
source_url, video_id, ext='mp4',
entry_protocol='m3u8_native', m3u8_id='hls'))
else:
formats.append({
'url': source_url,
})
self._sort_formats(formats)
return { return {
'_type': 'url_transparent',
'url': smuggle_url('kaltura:103:%s' % player_data['vid'], {
'service_url': 'http://kdam.iltrovatore.it',
}),
'id': video_id, 'id': video_id,
'title': player_data['title'], 'title': player_data['title'],
'description': self._og_search_description(webpage, default=None), 'description': self._og_search_description(webpage, default=None),
'thumbnail': player_data.get('poster'), 'thumbnail': player_data.get('poster'),
'formats': formats, 'ie_key': 'Kaltura',
} }

View File

@ -67,6 +67,20 @@ class XuiteIE(InfoExtractor):
'categories': ['電玩動漫'], 'categories': ['電玩動漫'],
}, },
'skip': 'Video removed', 'skip': 'Video removed',
}, {
# Video with encoded media id
# from http://forgetfulbc.blogspot.com/2016/06/date.html
'url': 'http://vlog.xuite.net/embed/cE1xbENoLTI3NDQ3MzM2LmZsdg==?ar=0&as=0',
'info_dict': {
'id': 'cE1xbENoLTI3NDQ3MzM2LmZsdg==',
'ext': 'mp4',
'title': '男女平權只是口號?專家解釋約會時男生是否該幫女生付錢 (中字)',
'description': 'md5:f0abdcb69df300f522a5442ef3146f2a',
'timestamp': 1466160960,
'upload_date': '20160617',
'uploader': 'B.C. & Lowy',
'uploader_id': '232279340',
},
}, { }, {
'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9', 'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9',
'only_matching': True, 'only_matching': True,
@ -80,10 +94,9 @@ class XuiteIE(InfoExtractor):
def base64_encode_utf8(data): def base64_encode_utf8(data):
return base64.b64encode(data.encode('utf-8')).decode('utf-8') return base64.b64encode(data.encode('utf-8')).decode('utf-8')
def _extract_flv_config(self, media_id): def _extract_flv_config(self, encoded_media_id):
base64_media_id = self.base64_encode_utf8(media_id)
flv_config = self._download_xml( flv_config = self._download_xml(
'http://vlog.xuite.net/flash/player?media=%s' % base64_media_id, 'http://vlog.xuite.net/flash/player?media=%s' % encoded_media_id,
'flv config') 'flv config')
prop_dict = {} prop_dict = {}
for prop in flv_config.findall('./property'): for prop in flv_config.findall('./property'):
@ -108,9 +121,14 @@ class XuiteIE(InfoExtractor):
'%s returned error: %s' % (self.IE_NAME, error_msg), '%s returned error: %s' % (self.IE_NAME, error_msg),
expected=True) expected=True)
encoded_media_id = self._search_regex(
r'attributes\.name\s*=\s*"([^"]+)"', webpage,
'encoded media id', default=None)
if encoded_media_id is None:
video_id = self._html_search_regex( video_id = self._html_search_regex(
r'data-mediaid="(\d+)"', webpage, 'media id') r'data-mediaid="(\d+)"', webpage, 'media id')
flv_config = self._extract_flv_config(video_id) encoded_media_id = self.base64_encode_utf8(video_id)
flv_config = self._extract_flv_config(encoded_media_id)
FORMATS = { FORMATS = {
'audio': 'mp3', 'audio': 'mp3',

View File

@ -1730,6 +1730,39 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
} }
class YoutubeSharedVideoIE(InfoExtractor):
_VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?ci=(?P<id>[0-9A-Za-z_-]{11})'
IE_NAME = 'youtube:shared'
_TEST = {
'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
'info_dict': {
'id': 'uPDB5I9wfp8',
'ext': 'webm',
'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
'upload_date': '20160219',
'uploader': 'Pocoyo - Português (BR)',
'uploader_id': 'PocoyoBrazil',
},
'add_ie': ['Youtube'],
'params': {
# There are already too many Youtube downloads
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
real_video_id = self._html_search_meta(
'videoId', webpage, 'YouTube video id', fatal=True)
return self.url_result(real_video_id, YoutubeIE.ie_key())
class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
IE_DESC = 'YouTube.com playlists' IE_DESC = 'YouTube.com playlists'
_VALID_URL = r"""(?x)(?: _VALID_URL = r"""(?x)(?:

View File

@ -1444,6 +1444,8 @@ def shell_quote(args):
def smuggle_url(url, data): def smuggle_url(url, data):
""" Pass additional data in a URL for internal use. """ """ Pass additional data in a URL for internal use. """
url, idata = unsmuggle_url(url, {})
data.update(idata)
sdata = compat_urllib_parse_urlencode( sdata = compat_urllib_parse_urlencode(
{'__youtubedl_smuggle': json.dumps(data)}) {'__youtubedl_smuggle': json.dumps(data)})
return url + '#' + sdata return url + '#' + sdata

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2016.07.03.1' __version__ = '2016.07.05'