This commit is contained in:
Gilles Habran 2016-07-15 08:57:31 +02:00
commit 08ffe16412
27 changed files with 568 additions and 364 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.13*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.11** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.13**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.07.11 [debug] youtube-dl version 2016.07.13
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -7,9 +7,6 @@ python:
- "3.4" - "3.4"
- "3.5" - "3.5"
sudo: false sudo: false
install:
- bash ./devscripts/install_srelay.sh
- export PATH=$PATH:$(pwd)/tmp/srelay-0.4.8b6
script: nosetests test --verbose script: nosetests test --verbose
notifications: notifications:
email: email:

View File

@ -15,13 +15,9 @@ data = urllib.request.urlopen(URL).read()
with open('download.html.in', 'r', encoding='utf-8') as tmplf: with open('download.html.in', 'r', encoding='utf-8') as tmplf:
template = tmplf.read() template = tmplf.read()
md5sum = hashlib.md5(data).hexdigest()
sha1sum = hashlib.sha1(data).hexdigest()
sha256sum = hashlib.sha256(data).hexdigest() sha256sum = hashlib.sha256(data).hexdigest()
template = template.replace('@PROGRAM_VERSION@', version) template = template.replace('@PROGRAM_VERSION@', version)
template = template.replace('@PROGRAM_URL@', URL) template = template.replace('@PROGRAM_URL@', URL)
template = template.replace('@PROGRAM_MD5SUM@', md5sum)
template = template.replace('@PROGRAM_SHA1SUM@', sha1sum)
template = template.replace('@PROGRAM_SHA256SUM@', sha256sum) template = template.replace('@PROGRAM_SHA256SUM@', sha256sum)
template = template.replace('@EXE_URL@', versions_info['versions'][version]['exe'][0]) template = template.replace('@EXE_URL@', versions_info['versions'][version]['exe'][0])
template = template.replace('@EXE_SHA256SUM@', versions_info['versions'][version]['exe'][1]) template = template.replace('@EXE_SHA256SUM@', versions_info['versions'][version]['exe'][1])

View File

@ -1,8 +0,0 @@
#!/bin/bash
mkdir -p tmp && cd tmp
wget -N http://downloads.sourceforge.net/project/socks-relay/socks-relay/srelay-0.4.8/srelay-0.4.8b6.tar.gz
tar zxvf srelay-0.4.8b6.tar.gz
cd srelay-0.4.8b6
./configure
make

View File

@ -568,6 +568,7 @@
- **rtve.es:infantil**: RTVE infantil - **rtve.es:infantil**: RTVE infantil
- **rtve.es:live**: RTVE.es live streams - **rtve.es:live**: RTVE.es live streams
- **RTVNH** - **RTVNH**
- **Rudo**
- **RUHD** - **RUHD**
- **RulePorn** - **RulePorn**
- **rutube**: Rutube videos - **rutube**: Rutube videos
@ -794,6 +795,7 @@
- **vine:user** - **vine:user**
- **vk**: VK - **vk**: VK
- **vk:uservideos**: VK - User's Videos - **vk:uservideos**: VK - User's Videos
- **vk:wallpost**
- **vlive** - **vlive**
- **Vodlocker** - **Vodlocker**
- **VoiceRepublic** - **VoiceRepublic**

View File

@ -55,12 +55,11 @@ class BBCCoUkIE(InfoExtractor):
'url': 'http://www.bbc.co.uk/programmes/b039g8p7', 'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
'info_dict': { 'info_dict': {
'id': 'b039d07m', 'id': 'b039d07m',
'ext': 'flv', 'ext': 'mp4',
'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4', 'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4',
'description': 'The Canadian poet and songwriter reflects on his musical career.', 'description': 'The Canadian poet and songwriter reflects on his musical career.',
}, },
'params': { 'params': {
# rtmp download
'skip_download': True, 'skip_download': True,
} }
}, },
@ -92,7 +91,7 @@ class BBCCoUkIE(InfoExtractor):
# rtmp download # rtmp download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only', 'skip': 'this episode is not currently available',
}, },
{ {
'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion', 'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
@ -107,7 +106,7 @@ class BBCCoUkIE(InfoExtractor):
# rtmp download # rtmp download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only', 'skip': 'this episode is not currently available',
}, { }, {
'url': 'http://www.bbc.co.uk/programmes/b04v20dw', 'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
'info_dict': { 'info_dict': {
@ -127,13 +126,12 @@ class BBCCoUkIE(InfoExtractor):
'note': 'Audio', 'note': 'Audio',
'info_dict': { 'info_dict': {
'id': 'p022h44j', 'id': 'p022h44j',
'ext': 'flv', 'ext': 'mp4',
'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances', 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.", 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
'duration': 227, 'duration': 227,
}, },
'params': { 'params': {
# rtmp download
'skip_download': True, 'skip_download': True,
} }
}, { }, {
@ -141,13 +139,12 @@ class BBCCoUkIE(InfoExtractor):
'note': 'Video', 'note': 'Video',
'info_dict': { 'info_dict': {
'id': 'p025c103', 'id': 'p025c103',
'ext': 'flv', 'ext': 'mp4',
'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)', 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
'description': 'Rae Morris performs Closer for BBC Three at Reading 2014', 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
'duration': 226, 'duration': 226,
}, },
'params': { 'params': {
# rtmp download
'skip_download': True, 'skip_download': True,
} }
}, { }, {
@ -163,7 +160,7 @@ class BBCCoUkIE(InfoExtractor):
# rtmp download # rtmp download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'geolocation', 'skip': 'this episode is not currently available',
}, { }, {
'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition', 'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
'info_dict': { 'info_dict': {
@ -177,7 +174,7 @@ class BBCCoUkIE(InfoExtractor):
# rtmp download # rtmp download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'geolocation', 'skip': 'this episode is not currently available',
}, { }, {
# iptv-all mediaset fails with geolocation however there is no geo restriction # iptv-all mediaset fails with geolocation however there is no geo restriction
# for this programme at all # for this programme at all
@ -192,18 +189,17 @@ class BBCCoUkIE(InfoExtractor):
# rtmp download # rtmp download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Now it\'s really geo-restricted', 'skip': 'this episode is not currently available on BBC iPlayer Radio',
}, { }, {
# compact player (https://github.com/rg3/youtube-dl/issues/8147) # compact player (https://github.com/rg3/youtube-dl/issues/8147)
'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player', 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
'info_dict': { 'info_dict': {
'id': 'p028bfkj', 'id': 'p028bfkj',
'ext': 'flv', 'ext': 'mp4',
'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews', 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews', 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
}, },
'params': { 'params': {
# rtmp download
'skip_download': True, 'skip_download': True,
}, },
}, { }, {
@ -249,7 +245,7 @@ class BBCCoUkIE(InfoExtractor):
pass pass
elif transfer_format == 'hls': elif transfer_format == 'hls':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
href, programme_id, ext='mp4', entry_protocol='m3u8_native', href, programme_id, 'mp4', 'm3u8_native',
m3u8_id=supplier, fatal=False)) m3u8_id=supplier, fatal=False))
# Direct link # Direct link
else: else:
@ -305,13 +301,14 @@ class BBCCoUkIE(InfoExtractor):
for connection in self._extract_connections(media): for connection in self._extract_connections(media):
conn_formats = self._extract_connection(connection, programme_id) conn_formats = self._extract_connection(connection, programme_id)
for format in conn_formats: for format in conn_formats:
format.update({ if format.get('protocol') != 'm3u8_native':
'width': width, format.update({
'height': height, 'width': width,
'vbr': vbr, 'height': height,
'vcodec': vcodec, 'vbr': vbr,
'filesize': file_size, 'vcodec': vcodec,
}) 'filesize': file_size,
})
if service: if service:
format['format_id'] = '%s_%s' % (service, format['format_id']) format['format_id'] = '%s_%s' % (service, format['format_id'])
formats.extend(conn_formats) formats.extend(conn_formats)

View File

@ -26,6 +26,8 @@ from ..utils import (
unescapeHTML, unescapeHTML,
unsmuggle_url, unsmuggle_url,
update_url_query, update_url_query,
clean_html,
mimetype2ext,
) )
@ -544,14 +546,16 @@ class BrightcoveNewIE(InfoExtractor):
formats = [] formats = []
for source in json_data.get('sources', []): for source in json_data.get('sources', []):
container = source.get('container') container = source.get('container')
source_type = source.get('type') ext = mimetype2ext(source.get('type'))
src = source.get('src') src = source.get('src')
if source_type == 'application/x-mpegURL' or container == 'M2TS': if ext == 'ism':
continue
elif ext == 'm3u8' or container == 'M2TS':
if not src: if not src:
continue continue
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
elif source_type == 'application/dash+xml': elif ext == 'mpd':
if not src: if not src:
continue continue
formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False)) formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False))
@ -567,7 +571,7 @@ class BrightcoveNewIE(InfoExtractor):
'tbr': tbr, 'tbr': tbr,
'filesize': int_or_none(source.get('size')), 'filesize': int_or_none(source.get('size')),
'container': container, 'container': container,
'ext': container.lower(), 'ext': ext or container.lower(),
} }
if width == 0 and height == 0: if width == 0 and height == 0:
f.update({ f.update({
@ -620,7 +624,7 @@ class BrightcoveNewIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': json_data.get('description'), 'description': clean_html(json_data.get('description')),
'thumbnail': json_data.get('thumbnail') or json_data.get('poster'), 'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
'duration': float_or_none(json_data.get('duration'), 1000), 'duration': float_or_none(json_data.get('duration'), 1000),
'timestamp': parse_iso8601(json_data.get('published_at')), 'timestamp': parse_iso8601(json_data.get('published_at')),

View File

@ -1207,6 +1207,7 @@ class InfoExtractor(object):
'url': format_url(line.strip()), 'url': format_url(line.strip()),
'tbr': tbr, 'tbr': tbr,
'ext': ext, 'ext': ext,
'fps': float_or_none(last_info.get('FRAME-RATE')),
'protocol': entry_protocol, 'protocol': entry_protocol,
'preference': preference, 'preference': preference,
} }
@ -1215,24 +1216,17 @@ class InfoExtractor(object):
width_str, height_str = resolution.split('x') width_str, height_str = resolution.split('x')
f['width'] = int(width_str) f['width'] = int(width_str)
f['height'] = int(height_str) f['height'] = int(height_str)
codecs = last_info.get('CODECS') # Unified Streaming Platform
if codecs: mobj = re.search(
vcodec, acodec = [None] * 2 r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
va_codecs = codecs.split(',') if mobj:
if len(va_codecs) == 1: abr, vbr = mobj.groups()
# Audio only entries usually come with single codec and abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
# no resolution. For more robustness we also check it to
# be mp4 audio.
if not resolution and va_codecs[0].startswith('mp4a'):
vcodec, acodec = 'none', va_codecs[0]
else:
vcodec = va_codecs[0]
else:
vcodec, acodec = va_codecs[:2]
f.update({ f.update({
'acodec': acodec, 'vbr': vbr,
'vcodec': vcodec, 'abr': abr,
}) })
f.update(parse_codecs(last_info.get('CODECS')))
if last_media is not None: if last_media is not None:
f['m3u8_media'] = last_media f['m3u8_media'] = last_media
last_media = None last_media = None

View File

@ -51,8 +51,11 @@ class CSpanIE(InfoExtractor):
'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers', 'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers',
'info_dict': { 'info_dict': {
'id': 'judiciary031715', 'id': 'judiciary031715',
'ext': 'flv', 'ext': 'mp4',
'title': 'Immigration Reforms Needed to Protect Skilled American Workers', 'title': 'Immigration Reforms Needed to Protect Skilled American Workers',
},
'params': {
'skip_download': True, # m3u8 downloads
} }
}] }]

View File

@ -4,78 +4,47 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
float_or_none,
int_or_none,
clean_html,
)
class DBTVIE(InfoExtractor): class DBTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:(?:lazyplayer|player)/)?(?P<id>[0-9]+)(?:#(?P<display_id>.+))?' _VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:[^/]+/)?(?P<id>[0-9]+)(?:#(?P<display_id>.+))?'
_TESTS = [{ _TESTS = [{
'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', 'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc', 'md5': '2e24f67936517b143a234b4cadf792ec',
'info_dict': { 'info_dict': {
'id': '33100', 'id': '3649835190001',
'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', 'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0', 'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0',
'thumbnail': 're:https?://.*\.jpg$', 'thumbnail': 're:https?://.*\.jpg',
'timestamp': 1404039863.438, 'timestamp': 1404039863,
'upload_date': '20140629', 'upload_date': '20140629',
'duration': 69.544, 'duration': 69.544,
'view_count': int, 'uploader_id': '1027729757001',
'categories': list, },
} 'add_ie': ['BrightcoveNew']
}, { }, {
'url': 'http://dbtv.no/3649835190001', 'url': 'http://dbtv.no/3649835190001',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://www.dbtv.no/lazyplayer/4631135248001', 'url': 'http://www.dbtv.no/lazyplayer/4631135248001',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://dbtv.no/vice/5000634109001',
'only_matching': True,
}, {
'url': 'http://dbtv.no/filmtrailer/3359293614001',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id, display_id = re.match(self._VALID_URL, url).groups()
video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id
data = self._download_json(
'http://api.dbtv.no/discovery/%s' % video_id, display_id)
video = data['playlist'][0]
formats = [{
'url': f['URL'],
'vcodec': f.get('container'),
'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')),
'vbr': float_or_none(f.get('rate'), 1000),
'filesize': int_or_none(f.get('size')),
} for f in video['renditions'] if 'URL' in f]
if not formats:
for url_key, format_id in [('URL', 'mp4'), ('HLSURL', 'hls')]:
if url_key in video:
formats.append({
'url': video[url_key],
'format_id': format_id,
})
self._sort_formats(formats)
return { return {
'id': compat_str(video['id']), '_type': 'url_transparent',
'url': 'http://players.brightcove.net/1027729757001/default_default/index.html?videoId=%s' % video_id,
'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': video['title'], 'ie_key': 'BrightcoveNew',
'description': clean_html(video['desc']),
'thumbnail': video.get('splash') or video.get('thumb'),
'timestamp': float_or_none(video.get('publishedAt'), 1000),
'duration': float_or_none(video.get('length'), 1000),
'view_count': int_or_none(video.get('views')),
'categories': video.get('tags'),
'formats': formats,
} }

View File

@ -17,8 +17,12 @@ class DreiSatIE(ZDFIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Waidmannsheil', 'title': 'Waidmannsheil',
'description': 'md5:cce00ca1d70e21425e72c86a98a56817', 'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
'uploader': '3sat', 'uploader': 'SCHWEIZWEIT',
'uploader_id': '100000210',
'upload_date': '20140913' 'upload_date': '20140913'
},
'params': {
'skip_download': True, # m3u8 downloads
} }
}, },
{ {

View File

@ -6,12 +6,13 @@ import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
NO_DEFAULT,
) )
class EllenTVIE(InfoExtractor): class EllenTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)' _VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)'
_TEST = { _TESTS = [{
'url': 'http://www.ellentv.com/videos/0-ipq1gsai/', 'url': 'http://www.ellentv.com/videos/0-ipq1gsai/',
'md5': '4294cf98bc165f218aaa0b89e0fd8042', 'md5': '4294cf98bc165f218aaa0b89e0fd8042',
'info_dict': { 'info_dict': {
@ -22,24 +23,47 @@ class EllenTVIE(InfoExtractor):
'timestamp': 1428035648, 'timestamp': 1428035648,
'upload_date': '20150403', 'upload_date': '20150403',
'uploader_id': 'batchUser', 'uploader_id': 'batchUser',
} },
} }, {
# not available via http://widgets.ellentube.com/
'url': 'http://www.ellentv.com/videos/1-szkgu2m2/',
'info_dict': {
'id': '1_szkgu2m2',
'ext': 'flv',
'title': "Ellen's Amazingly Talented Audience",
'description': 'md5:86ff1e376ff0d717d7171590e273f0a5',
'timestamp': 1255140900,
'upload_date': '20091010',
'uploader_id': 'ellenkaltura@gmail.com',
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage( URLS = ('http://widgets.ellentube.com/videos/%s' % video_id, url)
'http://widgets.ellentube.com/videos/%s' % video_id,
video_id)
partner_id = self._search_regex( for num, url_ in enumerate(URLS, 1):
r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id') webpage = self._download_webpage(
url_, video_id, fatal=num == len(URLS))
kaltura_id = self._search_regex( default = NO_DEFAULT if num == len(URLS) else None
[r'id="kaltura_player_([^"]+)"',
r"_wb_entry_id\s*:\s*'([^']+)", partner_id = self._search_regex(
r'data-kaltura-entry-id="([^"]+)'], r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id',
webpage, 'kaltura id') default=default)
kaltura_id = self._search_regex(
[r'id="kaltura_player_([^"]+)"',
r"_wb_entry_id\s*:\s*'([^']+)",
r'data-kaltura-entry-id="([^"]+)'],
webpage, 'kaltura id', default=default)
if partner_id and kaltura_id:
break
return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura') return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura')

View File

@ -537,6 +537,7 @@ from .nick import (
from .niconico import NiconicoIE, NiconicoPlaylistIE from .niconico import NiconicoIE, NiconicoPlaylistIE
from .ninecninemedia import NineCNineMediaIE from .ninecninemedia import NineCNineMediaIE
from .ninegag import NineGagIE from .ninegag import NineGagIE
from .ninenow import NineNowIE
from .noco import NocoIE from .noco import NocoIE
from .normalboots import NormalbootsIE from .normalboots import NormalbootsIE
from .nosvideo import NosVideoIE from .nosvideo import NosVideoIE
@ -689,7 +690,7 @@ from .rtlnl import RtlNlIE
from .rtl2 import RTL2IE from .rtl2 import RTL2IE
from .rtp import RTPIE from .rtp import RTPIE
from .rts import RTSIE from .rts import RTSIE
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
from .rtvnh import RTVNHIE from .rtvnh import RTVNHIE
from .rudo import RudoIE from .rudo import RudoIE
from .ruhd import RUHDIE from .ruhd import RUHDIE
@ -989,6 +990,7 @@ from .viki import (
from .vk import ( from .vk import (
VKIE, VKIE,
VKUserVideosIE, VKUserVideosIE,
VKWallPostIE,
) )
from .vlive import VLiveIE from .vlive import VLiveIE
from .vodlocker import VodlockerIE from .vodlocker import VodlockerIE

View File

@ -28,10 +28,13 @@ class GameSpotIE(OnceIE):
'url': 'http://www.gamespot.com/videos/the-witcher-3-wild-hunt-xbox-one-now-playing/2300-6424837/', 'url': 'http://www.gamespot.com/videos/the-witcher-3-wild-hunt-xbox-one-now-playing/2300-6424837/',
'info_dict': { 'info_dict': {
'id': 'gs-2300-6424837', 'id': 'gs-2300-6424837',
'ext': 'flv', 'ext': 'mp4',
'title': 'The Witcher 3: Wild Hunt [Xbox ONE] - Now Playing', 'title': 'Now Playing - The Witcher 3: Wild Hunt',
'description': 'Join us as we take a look at the early hours of The Witcher 3: Wild Hunt and more.', 'description': 'Join us as we take a look at the early hours of The Witcher 3: Wild Hunt and more.',
}, },
'params': {
'skip_download': True, # m3u8 downloads
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -0,0 +1,72 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
float_or_none,
ExtractorError,
)
class NineNowIE(InfoExtractor):
IE_NAME = '9now.com.au'
_VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/]+/){2}(?P<id>[^/?#]+)'
_TESTS = [{
# clip
'url': 'https://www.9now.com.au/afl-footy-show/2016/clip-ciql02091000g0hp5oktrnytc',
'md5': '17cf47d63ec9323e562c9957a968b565',
'info_dict': {
'id': '16801',
'ext': 'mp4',
'title': 'St. Kilda\'s Joey Montagna on the potential for a player\'s strike',
'description': 'Is a boycott of the NAB Cup "on the table"?',
'uploader_id': '4460760524001',
'upload_date': '20160713',
'timestamp': 1468421266,
},
'skip': 'Only available in Australia',
}, {
# episode
'url': 'https://www.9now.com.au/afl-footy-show/2016/episode-19',
'only_matching': True,
}, {
# DRM protected
'url': 'https://www.9now.com.au/andrew-marrs-history-of-the-world/season-1/episode-1',
'only_matching': True,
}]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4460760524001/default_default/index.html?videoId=%s'
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
page_data = self._parse_json(self._search_regex(
r'window\.__data\s*=\s*({.*?});', webpage,
'page data'), display_id)
common_data = page_data.get('episode', {}).get('episode') or page_data.get('clip', {}).get('clip')
video_data = common_data['video']
if video_data.get('drm'):
raise ExtractorError('This video is DRM protected.', expected=True)
brightcove_id = video_data.get('brightcoveId') or 'ref:' + video_data['referenceId']
video_id = compat_str(video_data.get('id') or brightcove_id)
title = common_data['name']
thumbnails = [{
'id': thumbnail_id,
'url': thumbnail_url,
'width': int_or_none(thumbnail_id[1:])
} for thumbnail_id, thumbnail_url in common_data.get('image', {}).get('sizes', {}).items()]
return {
'_type': 'url_transparent',
'url': self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
'id': video_id,
'title': title,
'description': common_data.get('description'),
'duration': float_or_none(video_data.get('duration'), 1000),
'thumbnails': thumbnails,
'ie_key': 'BrightcoveNew',
}

View File

@ -40,16 +40,16 @@ class ORFTVthekIE(InfoExtractor):
'skip': 'Blocked outside of Austria / Germany', 'skip': 'Blocked outside of Austria / Germany',
}, { }, {
'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256', 'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256',
'playlist': [{ 'info_dict': {
'md5': '68f543909aea49d621dfc7703a11cfaf', 'id': '7982259',
'info_dict': { 'ext': 'mp4',
'id': '7982259', 'title': 'Best of Ingrid Thurnher',
'ext': 'mp4', 'upload_date': '20140527',
'title': 'Best of Ingrid Thurnher', 'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".',
'upload_date': '20140527', },
'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".', 'params': {
} 'skip_download': True, # rtsp downloads
}], },
'_skip': 'Blocked outside of Austria / Germany', '_skip': 'Blocked outside of Austria / Germany',
}] }]

View File

@ -113,9 +113,7 @@ class RTVEALaCartaIE(InfoExtractor):
png = self._download_webpage(png_request, video_id, 'Downloading url information') png = self._download_webpage(png_request, video_id, 'Downloading url information')
video_url = _decrypt_url(png) video_url = _decrypt_url(png)
if not video_url.endswith('.f4m'): if not video_url.endswith('.f4m'):
video_url = video_url.replace( video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve')
'resources/', 'auth/resources/'
).replace('.net.rtve', '.multimedia.cdn.rtve')
subtitles = None subtitles = None
if info.get('sbtFile') is not None: if info.get('sbtFile') is not None:
@ -222,3 +220,34 @@ class RTVELiveIE(InfoExtractor):
'formats': formats, 'formats': formats,
'is_live': True, 'is_live': True,
} }
class RTVETelevisionIE(InfoExtractor):
IE_NAME = 'rtve.es:television'
_VALID_URL = r'https?://www\.rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml'
_TEST = {
'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml',
'info_dict': {
'id': '3069778',
'ext': 'mp4',
'title': 'Documentos TV - La revolución del móvil',
'duration': 3496.948,
},
'params': {
'skip_download': True,
},
}
def _real_extract(self, url):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
alacarta_url = self._search_regex(
r'data-location="alacarta_videos"[^<]+url&quot;:&quot;(http://www\.rtve\.es/alacarta.+?)&',
webpage, 'alacarta url', default=None)
if alacarta_url is None:
raise ExtractorError(
'The webpage doesn\'t contain any video', expected=True)
return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key())

View File

@ -2,11 +2,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlencode
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
str_or_none,
) )
@ -33,45 +33,27 @@ class ShahidIE(InfoExtractor):
'only_matching': True 'only_matching': True
}] }]
def _handle_error(self, response): def _call_api(self, path, video_id, note):
if not isinstance(response, dict): data = self._download_json(
return 'http://api.shahid.net/api/v1_1/' + path, video_id, note, query={
error = response.get('error') 'apiKey': 'sh@hid0nlin3',
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
}).get('data', {})
error = data.get('error')
if error: if error:
raise ExtractorError( raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())), '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())),
expected=True) expected=True)
def _download_json(self, url, video_id, note='Downloading JSON metadata'): return data
response = super(ShahidIE, self)._download_json(url, video_id, note)['data']
self._handle_error(response)
return response
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) player = self._call_api(
'Content/Episode/%s' % video_id,
api_vars = { video_id, 'Downloading player JSON')
'id': video_id,
'type': 'player',
'url': 'http://api.shahid.net/api/v1_1',
'playerType': 'episode',
}
flashvars = self._search_regex(
r'var\s+flashvars\s*=\s*({[^}]+})', webpage, 'flashvars', default=None)
if flashvars:
for key in api_vars.keys():
value = self._search_regex(
r'\b%s\s*:\s*(?P<q>["\'])(?P<value>.+?)(?P=q)' % key,
flashvars, 'type', default=None, group='value')
if value:
api_vars[key] = value
player = self._download_json(
'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-%s.html'
% (video_id, api_vars['type']), video_id, 'Downloading player JSON')
if player.get('drm'): if player.get('drm'):
raise ExtractorError('This video is DRM protected.', expected=True) raise ExtractorError('This video is DRM protected.', expected=True)
@ -79,22 +61,11 @@ class ShahidIE(InfoExtractor):
formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4') formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
self._sort_formats(formats) self._sort_formats(formats)
video = self._download_json( video = self._call_api(
'%s/%s/%s?%s' % ( 'episode/%s' % video_id, video_id,
api_vars['url'], api_vars['playerType'], api_vars['id'], 'Downloading video JSON')['episode']
compat_urllib_parse_urlencode({
'apiKey': 'sh@hid0nlin3',
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
})),
video_id, 'Downloading video JSON')
video = video[api_vars['playerType']]
title = video['title'] title = video['title']
description = video.get('description')
thumbnail = video.get('thumbnailUrl')
duration = int_or_none(video.get('duration'))
timestamp = parse_iso8601(video.get('referenceDate'))
categories = [ categories = [
category['name'] category['name']
for category in video.get('genres', []) if 'name' in category] for category in video.get('genres', []) if 'name' in category]
@ -102,10 +73,16 @@ class ShahidIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': video.get('description'),
'thumbnail': thumbnail, 'thumbnail': video.get('thumbnailUrl'),
'duration': duration, 'duration': int_or_none(video.get('duration')),
'timestamp': timestamp, 'timestamp': parse_iso8601(video.get('referenceDate')),
'categories': categories, 'categories': categories,
'series': video.get('showTitle') or video.get('showName'),
'season': video.get('seasonTitle'),
'season_number': int_or_none(video.get('seasonNumber')),
'season_id': str_or_none(video.get('seasonId')),
'episode_number': int_or_none(video.get('number')),
'episode_id': video_id,
'formats': formats, 'formats': formats,
} }

View File

@ -1,46 +1,56 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re from .theplatform import ThePlatformIE
from ..utils import (
from .common import InfoExtractor update_url_query,
smuggle_url,
)
class SyfyIE(InfoExtractor): class SyfyIE(ThePlatformIE):
_VALID_URL = r'https?://www\.syfy\.com/(?:videos/.+?vid:(?P<id>[0-9]+)|(?!videos)(?P<video_name>[^/]+)(?:$|[?#]))' _VALID_URL = r'https?://www\.syfy\.com/(?:[^/]+/)?videos/(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.syfy.com/videos/Robot%20Combat%20League/Behind%20the%20Scenes/vid:2631458', 'url': 'http://www.syfy.com/theinternetruinedmylife/videos/the-internet-ruined-my-life-season-1-trailer',
'info_dict': { 'info_dict': {
'id': 'NmqMrGnXvmO1', 'id': '2968097',
'ext': 'flv', 'ext': 'mp4',
'title': 'George Lucas has Advice for his Daughter', 'title': 'The Internet Ruined My Life: Season 1 Trailer',
'description': 'Listen to what insights George Lucas give his daughter Amanda.', 'description': 'One tweet, one post, one click, can destroy everything.',
'uploader': 'NBCU-MPAT',
'upload_date': '20170113',
'timestamp': 1484345640,
},
'params': {
# m3u8 download
'skip_download': True,
}, },
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
}, {
'url': 'http://www.syfy.com/wilwheaton',
'md5': '94dfa54ee3ccb63295b276da08c415f6',
'info_dict': {
'id': '4yoffOOXC767',
'ext': 'flv',
'title': 'The Wil Wheaton Project - Premiering May 27th at 10/9c.',
'description': 'The Wil Wheaton Project premieres May 27th at 10/9c. Don\'t miss it.',
},
'add_ie': ['ThePlatform'],
'skip': 'Blocked outside the US',
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) display_id = self._match_id(url)
video_name = mobj.group('video_name') webpage = self._download_webpage(url, display_id)
if video_name: syfy_mpx = list(self._parse_json(self._search_regex(
generic_webpage = self._download_webpage(url, video_name) r'jQuery\.extend\([^,]+,\s*({.+})\);', webpage, 'drupal settings'),
video_id = self._search_regex( display_id)['syfy']['syfy_mpx'].values())[0]
r'<iframe.*?class="video_iframe_page"\s+src="/_utils/video/thP_video_controller.php.*?_vid([0-9]+)">', video_id = syfy_mpx['mpxGUID']
generic_webpage, 'video ID') title = syfy_mpx['episodeTitle']
url = 'http://www.syfy.com/videos/%s/%s/vid:%s' % ( query = {
video_name, video_name, video_id) 'mbr': 'true',
else: 'manifest': 'm3u',
video_id = mobj.group('id') }
webpage = self._download_webpage(url, video_id) if syfy_mpx.get('entitlement') == 'auth':
return self.url_result(self._og_search_video_url(webpage)) resource = '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"><channel><title>syfy</title><item><title><![CDATA[%s]]></title><guid>%s</guid><media:rating scheme="urn:v-chip">%s</media:rating></item></channel></rss>' % (title, video_id, syfy_mpx.get('mpxRating', 'TV-14'))
query['auth'] = self._extract_mvpd_auth(
url, video_id, 'syfy', resource)
return {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
'url': smuggle_url(update_url_query(
self._proto_relative_url(syfy_mpx['releaseURL']), query),
{'force_smil_url': True}),
'title': title,
'id': video_id,
'display_id': display_id,
}

View File

@ -24,16 +24,20 @@ class ThreeQSDNIE(InfoExtractor):
'title': '0280d6b9-1215-11e6-b427-0cc47a188158', 'title': '0280d6b9-1215-11e6-b427-0cc47a188158',
'is_live': False, 'is_live': False,
}, },
'expected_warnings': ['Failed to download MPD manifest'], 'expected_warnings': ['Failed to download MPD manifest', 'Failed to parse JSON'],
}, { }, {
# live video stream # live video stream
'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true', 'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true',
'info_dict': { 'info_dict': {
'id': 'd755d94b-4ab9-11e3-9162-0025907ad44f', 'id': 'd755d94b-4ab9-11e3-9162-0025907ad44f',
'ext': 'mp4', 'ext': 'mp4',
'title': 'd755d94b-4ab9-11e3-9162-0025907ad44f', 'title': 're:^d755d94b-4ab9-11e3-9162-0025907ad44f [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'is_live': False, 'is_live': True,
}, },
'params': {
'skip_download': True, # m3u8 downloads
},
'expected_warnings': ['Failed to download MPD manifest'],
}, { }, {
# live audio stream # live audio stream
'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48', 'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48',
@ -114,7 +118,7 @@ class ThreeQSDNIE(InfoExtractor):
'vcodec': 'none' if stream_type == 'audio' else None, 'vcodec': 'none' if stream_type == 'audio' else None,
}) })
for item_js in re.findall(r'({.*?\b(?:src|source)\s*:\s*["\'].+?})', js): for item_js in re.findall(r'({[^{]*?\b(?:src|source)\s*:\s*["\'].+?})', js):
f = self._parse_json( f = self._parse_json(
item_js, video_id, transform_source=js_to_json, fatal=False) item_js, video_id, transform_source=js_to_json, fatal=False)
if not f: if not f:

View File

@ -5,31 +5,27 @@ from .common import InfoExtractor
class TMZIE(InfoExtractor): class TMZIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P<id>[^/]+)/?' _VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P<id>[^/?#]+)'
_TEST = { _TESTS = [{
'url': 'http://www.tmz.com/videos/0_okj015ty/', 'url': 'http://www.tmz.com/videos/0_okj015ty/',
'md5': '791204e3bf790b1426cb2db0706184c0', 'md5': '4d22a51ef205b6c06395d8394f72d560',
'info_dict': { 'info_dict': {
'id': '0_okj015ty', 'id': '0_okj015ty',
'url': 'http://tmz.vo.llnwd.net/o28/2014-03/13/0_okj015ty_0_rt8ro3si_2.mp4',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!', 'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!',
'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie??? Or is she just showing off her amazing boobs?', 'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie??? Or is she just showing off her amazing boobs?',
'thumbnail': r're:http://cdnbakmi\.kaltura\.com/.*thumbnail.*', 'timestamp': 1394747163,
'uploader_id': 'batchUser',
'upload_date': '20140313',
} }
} }, {
'url': 'http://www.tmz.com/videos/0-cegprt2p/',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url).replace('-', '_')
webpage = self._download_webpage(url, video_id) return self.url_result('kaltura:591531:%s' % video_id, 'Kaltura', video_id)
return {
'id': video_id,
'url': self._html_search_meta('VideoURL', webpage, fatal=True),
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._html_search_meta('ThumbURL', webpage),
}
class TMZArticleIE(InfoExtractor): class TMZArticleIE(InfoExtractor):

View File

@ -8,43 +8,36 @@ from ..compat import compat_str
from ..utils import ( from ..utils import (
parse_iso8601, parse_iso8601,
qualities, qualities,
determine_ext,
update_url_query,
int_or_none,
) )
class TVPlayIE(InfoExtractor): class TVPlayIE(InfoExtractor):
IE_DESC = 'TV3Play and related services' IE_DESC = 'TV3Play and related services'
_VALID_URL = r'''(?x)https?://(?:www\.)? _VALID_URL = r'''(?x)https?://(?:www\.)?
(?:tvplay\.lv/parraides| (?:tvplay(?:\.skaties)?\.lv/parraides|
tv3play\.lt/programos| (?:tv3play|play\.tv3)\.lt/programos|
play\.tv3\.lt/programos| tv3play(?:\.tv3)?\.ee/sisu|
tv3play\.ee/sisu| tv(?:3|6|8|10)play\.se/program|
tv3play\.se/program| (?:(?:tv3play|viasat4play|tv6play)\.no|tv3play\.dk)/programmer|
tv6play\.se/program|
tv8play\.se/program|
tv10play\.se/program|
tv3play\.no/programmer|
viasat4play\.no/programmer|
tv6play\.no/programmer|
tv3play\.dk/programmer|
play\.novatv\.bg/programi play\.novatv\.bg/programi
)/[^/]+/(?P<id>\d+) )/[^/]+/(?P<id>\d+)
''' '''
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true', 'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true',
'md5': 'a1612fe0849455423ad8718fe049be21',
'info_dict': { 'info_dict': {
'id': '418113', 'id': '418113',
'ext': 'flv', 'ext': 'mp4',
'title': 'Kādi ir īri? - Viņas melo labāk', 'title': 'Kādi ir īri? - Viņas melo labāk',
'description': 'Baiba apsmej īrus, kādi tie ir un ko viņi dara.', 'description': 'Baiba apsmej īrus, kādi tie ir un ko viņi dara.',
'duration': 25, 'duration': 25,
'timestamp': 1406097056, 'timestamp': 1406097056,
'upload_date': '20140723', 'upload_date': '20140723',
}, },
'params': {
# rtmp download
'skip_download': True,
},
}, },
{ {
'url': 'http://play.tv3.lt/programos/moterys-meluoja-geriau/409229?autostart=true', 'url': 'http://play.tv3.lt/programos/moterys-meluoja-geriau/409229?autostart=true',
@ -82,7 +75,7 @@ class TVPlayIE(InfoExtractor):
'url': 'http://www.tv3play.se/program/husraddarna/395385?autostart=true', 'url': 'http://www.tv3play.se/program/husraddarna/395385?autostart=true',
'info_dict': { 'info_dict': {
'id': '395385', 'id': '395385',
'ext': 'flv', 'ext': 'mp4',
'title': 'Husräddarna S02E07', 'title': 'Husräddarna S02E07',
'description': 'md5:f210c6c89f42d4fc39faa551be813777', 'description': 'md5:f210c6c89f42d4fc39faa551be813777',
'duration': 2574, 'duration': 2574,
@ -90,7 +83,6 @@ class TVPlayIE(InfoExtractor):
'upload_date': '20140520', 'upload_date': '20140520',
}, },
'params': { 'params': {
# rtmp download
'skip_download': True, 'skip_download': True,
}, },
}, },
@ -98,7 +90,7 @@ class TVPlayIE(InfoExtractor):
'url': 'http://www.tv6play.se/program/den-sista-dokusapan/266636?autostart=true', 'url': 'http://www.tv6play.se/program/den-sista-dokusapan/266636?autostart=true',
'info_dict': { 'info_dict': {
'id': '266636', 'id': '266636',
'ext': 'flv', 'ext': 'mp4',
'title': 'Den sista dokusåpan S01E08', 'title': 'Den sista dokusåpan S01E08',
'description': 'md5:295be39c872520221b933830f660b110', 'description': 'md5:295be39c872520221b933830f660b110',
'duration': 1492, 'duration': 1492,
@ -107,7 +99,6 @@ class TVPlayIE(InfoExtractor):
'age_limit': 18, 'age_limit': 18,
}, },
'params': { 'params': {
# rtmp download
'skip_download': True, 'skip_download': True,
}, },
}, },
@ -115,7 +106,7 @@ class TVPlayIE(InfoExtractor):
'url': 'http://www.tv8play.se/program/antikjakten/282756?autostart=true', 'url': 'http://www.tv8play.se/program/antikjakten/282756?autostart=true',
'info_dict': { 'info_dict': {
'id': '282756', 'id': '282756',
'ext': 'flv', 'ext': 'mp4',
'title': 'Antikjakten S01E10', 'title': 'Antikjakten S01E10',
'description': 'md5:1b201169beabd97e20c5ad0ad67b13b8', 'description': 'md5:1b201169beabd97e20c5ad0ad67b13b8',
'duration': 2646, 'duration': 2646,
@ -123,7 +114,6 @@ class TVPlayIE(InfoExtractor):
'upload_date': '20120925', 'upload_date': '20120925',
}, },
'params': { 'params': {
# rtmp download
'skip_download': True, 'skip_download': True,
}, },
}, },
@ -131,7 +121,7 @@ class TVPlayIE(InfoExtractor):
'url': 'http://www.tv3play.no/programmer/anna-anka-soker-assistent/230898?autostart=true', 'url': 'http://www.tv3play.no/programmer/anna-anka-soker-assistent/230898?autostart=true',
'info_dict': { 'info_dict': {
'id': '230898', 'id': '230898',
'ext': 'flv', 'ext': 'mp4',
'title': 'Anna Anka søker assistent - Ep. 8', 'title': 'Anna Anka søker assistent - Ep. 8',
'description': 'md5:f80916bf5bbe1c5f760d127f8dd71474', 'description': 'md5:f80916bf5bbe1c5f760d127f8dd71474',
'duration': 2656, 'duration': 2656,
@ -139,7 +129,6 @@ class TVPlayIE(InfoExtractor):
'upload_date': '20100628', 'upload_date': '20100628',
}, },
'params': { 'params': {
# rtmp download
'skip_download': True, 'skip_download': True,
}, },
}, },
@ -147,7 +136,7 @@ class TVPlayIE(InfoExtractor):
'url': 'http://www.viasat4play.no/programmer/budbringerne/21873?autostart=true', 'url': 'http://www.viasat4play.no/programmer/budbringerne/21873?autostart=true',
'info_dict': { 'info_dict': {
'id': '21873', 'id': '21873',
'ext': 'flv', 'ext': 'mp4',
'title': 'Budbringerne program 10', 'title': 'Budbringerne program 10',
'description': 'md5:4db78dc4ec8a85bb04fd322a3ee5092d', 'description': 'md5:4db78dc4ec8a85bb04fd322a3ee5092d',
'duration': 1297, 'duration': 1297,
@ -155,7 +144,6 @@ class TVPlayIE(InfoExtractor):
'upload_date': '20090929', 'upload_date': '20090929',
}, },
'params': { 'params': {
# rtmp download
'skip_download': True, 'skip_download': True,
}, },
}, },
@ -163,7 +151,7 @@ class TVPlayIE(InfoExtractor):
'url': 'http://www.tv6play.no/programmer/hotelinspektor-alex-polizzi/361883?autostart=true', 'url': 'http://www.tv6play.no/programmer/hotelinspektor-alex-polizzi/361883?autostart=true',
'info_dict': { 'info_dict': {
'id': '361883', 'id': '361883',
'ext': 'flv', 'ext': 'mp4',
'title': 'Hotelinspektør Alex Polizzi - Ep. 10', 'title': 'Hotelinspektør Alex Polizzi - Ep. 10',
'description': 'md5:3ecf808db9ec96c862c8ecb3a7fdaf81', 'description': 'md5:3ecf808db9ec96c862c8ecb3a7fdaf81',
'duration': 2594, 'duration': 2594,
@ -171,7 +159,6 @@ class TVPlayIE(InfoExtractor):
'upload_date': '20140224', 'upload_date': '20140224',
}, },
'params': { 'params': {
# rtmp download
'skip_download': True, 'skip_download': True,
}, },
}, },
@ -191,6 +178,14 @@ class TVPlayIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, },
{
'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true',
'only_matching': True,
},
{
'url': 'http://tv3play.tv3.ee/sisu/kodu-keset-linna/238551?autostart=true',
'only_matching': True,
}
] ]
def _real_extract(self, url): def _real_extract(self, url):
@ -199,7 +194,9 @@ class TVPlayIE(InfoExtractor):
video = self._download_json( video = self._download_json(
'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON') 'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')
if video['is_geo_blocked']: title = video['title']
if video.get('is_geo_blocked'):
self.report_warning( self.report_warning(
'This content might not be available in your country due to copyright reasons') 'This content might not be available in your country due to copyright reasons')
@ -208,42 +205,50 @@ class TVPlayIE(InfoExtractor):
quality = qualities(['hls', 'medium', 'high']) quality = qualities(['hls', 'medium', 'high'])
formats = [] formats = []
for format_id, video_url in streams['streams'].items(): for format_id, video_url in streams.get('streams', {}).items():
if not video_url or not isinstance(video_url, compat_str): if not video_url or not isinstance(video_url, compat_str):
continue continue
fmt = { ext = determine_ext(video_url)
'format_id': format_id, if ext == 'f4m':
'preference': quality(format_id),
}
if video_url.startswith('rtmp'):
m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
if not m:
continue
fmt.update({
'ext': 'flv',
'url': m.group('url'),
'app': m.group('app'),
'play_path': m.group('playpath'),
})
elif video_url.endswith('.f4m'):
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
video_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id)) update_url_query(video_url, {
continue 'hdcore': '3.5.0',
'plugin': 'aasp-3.5.0.151.81'
}), video_id, f4m_id='hds', fatal=False))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
else: else:
fmt.update({ fmt = {
'url': video_url, 'format_id': format_id,
}) 'quality': quality(format_id),
formats.append(fmt) 'ext': ext,
}
if video_url.startswith('rtmp'):
m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
if not m:
continue
fmt.update({
'ext': 'flv',
'url': m.group('url'),
'app': m.group('app'),
'play_path': m.group('playpath'),
})
else:
fmt.update({
'url': video_url,
})
formats.append(fmt)
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'title': video['title'], 'title': title,
'description': video['description'], 'description': video.get('description'),
'duration': video['duration'], 'duration': int_or_none(video.get('duration')),
'timestamp': parse_iso8601(video['created_at']), 'timestamp': parse_iso8601(video.get('created_at')),
'view_count': video['views']['total'], 'view_count': int_or_none(video.get('views', {}).get('total')),
'age_limit': video.get('age_limit', 0), 'age_limit': int_or_none(video.get('age_limit', 0)),
'formats': formats, 'formats': formats,
} }

View File

@ -6,11 +6,18 @@ import json
import sys import sys
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import (
compat_str,
compat_urlparse,
)
from ..utils import ( from ..utils import (
clean_html,
ExtractorError, ExtractorError,
get_element_by_class,
int_or_none, int_or_none,
orderedSet, orderedSet,
parse_duration,
remove_start,
str_to_int, str_to_int,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
@ -20,7 +27,55 @@ from .vimeo import VimeoIE
from .pladform import PladformIE from .pladform import PladformIE
class VKIE(InfoExtractor): class VKBaseIE(InfoExtractor):
_NETRC_MACHINE = 'vk'
def _login(self):
(username, password) = self._get_login_info()
if username is None:
return
login_page, url_handle = self._download_webpage_handle(
'https://vk.com', None, 'Downloading login page')
login_form = self._hidden_inputs(login_page)
login_form.update({
'email': username.encode('cp1251'),
'pass': password.encode('cp1251'),
})
# https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header
# and expects the first one to be set rather than second (see
# https://github.com/rg3/youtube-dl/issues/9841#issuecomment-227871201).
# As of RFC6265 the newer one cookie should be set into cookie store
# what actually happens.
# We will workaround this VK issue by resetting the remixlhk cookie to
# the first one manually.
cookies = url_handle.headers.get('Set-Cookie')
if cookies:
if sys.version_info[0] >= 3:
cookies = cookies.encode('iso-8859-1')
cookies = cookies.decode('utf-8')
remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies)
if remixlhk:
value, domain = remixlhk.groups()
self._set_cookie(domain, 'remixlhk', value)
login_page = self._download_webpage(
'https://login.vk.com/?act=login', None,
note='Logging in as %s' % username,
data=urlencode_postdata(login_form))
if re.search(r'onLoginFailed', login_page):
raise ExtractorError(
'Unable to login, incorrect username and/or password', expected=True)
def _real_initialize(self):
self._login()
class VKIE(VKBaseIE):
IE_NAME = 'vk' IE_NAME = 'vk'
IE_DESC = 'VK' IE_DESC = 'VK'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
@ -38,8 +93,6 @@ class VKIE(InfoExtractor):
(?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>[\da-f]+))? (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>[\da-f]+))?
) )
''' '''
_NETRC_MACHINE = 'vk'
_TESTS = [ _TESTS = [
{ {
'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
@ -189,49 +242,6 @@ class VKIE(InfoExtractor):
} }
] ]
def _login(self):
(username, password) = self._get_login_info()
if username is None:
return
login_page, url_handle = self._download_webpage_handle(
'https://vk.com', None, 'Downloading login page')
login_form = self._hidden_inputs(login_page)
login_form.update({
'email': username.encode('cp1251'),
'pass': password.encode('cp1251'),
})
# https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header
# and expects the first one to be set rather than second (see
# https://github.com/rg3/youtube-dl/issues/9841#issuecomment-227871201).
# As of RFC6265 the newer one cookie should be set into cookie store
# what actually happens.
# We will workaround this VK issue by resetting the remixlhk cookie to
# the first one manually.
cookies = url_handle.headers.get('Set-Cookie')
if sys.version_info[0] >= 3:
cookies = cookies.encode('iso-8859-1')
cookies = cookies.decode('utf-8')
remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies)
if remixlhk:
value, domain = remixlhk.groups()
self._set_cookie(domain, 'remixlhk', value)
login_page = self._download_webpage(
'https://login.vk.com/?act=login', None,
note='Logging in as %s' % username,
data=urlencode_postdata(login_form))
if re.search(r'onLoginFailed', login_page):
raise ExtractorError(
'Unable to login, incorrect username and/or password', expected=True)
def _real_initialize(self):
self._login()
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid') video_id = mobj.group('videoid')
@ -355,7 +365,7 @@ class VKIE(InfoExtractor):
} }
class VKUserVideosIE(InfoExtractor): class VKUserVideosIE(VKBaseIE):
IE_NAME = 'vk:uservideos' IE_NAME = 'vk:uservideos'
IE_DESC = "VK - User's Videos" IE_DESC = "VK - User's Videos"
_VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)' _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)'
@ -396,3 +406,121 @@ class VKUserVideosIE(InfoExtractor):
webpage, 'title', default=page_id)) webpage, 'title', default=page_id))
return self.playlist_result(entries, page_id, title) return self.playlist_result(entries, page_id, title)
class VKWallPostIE(VKBaseIE):
IE_NAME = 'vk:wallpost'
_VALID_URL = r'https?://(?:(?:(?:(?:m|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P<id>-?\d+_\d+)))'
_TESTS = [{
# public page URL, audio playlist
'url': 'https://vk.com/bs.official?w=wall-23538238_35',
'info_dict': {
'id': '23538238_35',
'title': 'Black Shadow - Wall post 23538238_35',
'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c',
},
'playlist': [{
'md5': '5ba93864ec5b85f7ce19a9af4af080f6',
'info_dict': {
'id': '135220665_111806521',
'ext': 'mp3',
'title': 'Black Shadow - Слепое Верование',
'duration': 370,
'uploader': 'Black Shadow',
'artist': 'Black Shadow',
'track': 'Слепое Верование',
},
}, {
'md5': '4cc7e804579122b17ea95af7834c9233',
'info_dict': {
'id': '135220665_111802303',
'ext': 'mp3',
'title': 'Black Shadow - Война - Негасимое Бездны Пламя!',
'duration': 423,
'uploader': 'Black Shadow',
'artist': 'Black Shadow',
'track': 'Война - Негасимое Бездны Пламя!',
},
'params': {
'skip_download': True,
},
}],
'skip': 'Requires vk account credentials',
}, {
# single YouTube embed, no leading -
'url': 'https://vk.com/wall85155021_6319',
'info_dict': {
'id': '85155021_6319',
'title': 'Sergey Gorbunov - Wall post 85155021_6319',
},
'playlist_count': 1,
'skip': 'Requires vk account credentials',
}, {
# wall page URL
'url': 'https://vk.com/wall-23538238_35',
'only_matching': True,
}, {
# mobile wall page URL
'url': 'https://m.vk.com/wall-23538238_35',
'only_matching': True,
}]
def _real_extract(self, url):
post_id = self._match_id(url)
wall_url = 'https://vk.com/wall%s' % post_id
post_id = remove_start(post_id, '-')
webpage = self._download_webpage(wall_url, post_id)
error = self._html_search_regex(
r'>Error</div>\s*<div[^>]+class=["\']body["\'][^>]*>([^<]+)',
webpage, 'error', default=None)
if error:
raise ExtractorError('VK said: %s' % error, expected=True)
description = clean_html(get_element_by_class('wall_post_text', webpage))
uploader = clean_html(get_element_by_class(
'fw_post_author', webpage)) or self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
entries = []
for audio in re.finditer(r'''(?sx)
<input[^>]+
id=(?P<q1>["\'])audio_info(?P<id>\d+_\d+).*?(?P=q1)[^>]+
value=(?P<q2>["\'])(?P<url>http.+?)(?P=q2)
.+?
</table>''', webpage):
audio_html = audio.group(0)
audio_id = audio.group('id')
duration = parse_duration(get_element_by_class('duration', audio_html))
track = self._html_search_regex(
r'<span[^>]+id=["\']title%s[^>]*>([^<]+)' % audio_id,
audio_html, 'title', default=None)
artist = self._html_search_regex(
r'>([^<]+)</a></b>\s*&ndash', audio_html,
'artist', default=None)
entries.append({
'id': audio_id,
'url': audio.group('url'),
'title': '%s - %s' % (artist, track) if artist and track else audio_id,
'thumbnail': thumbnail,
'duration': duration,
'uploader': uploader,
'artist': artist,
'track': track,
})
for video in re.finditer(
r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage):
entries.append(self.url_result(
compat_urlparse.urljoin(url, video.group('url')), VKIE.ie_key()))
title = 'Wall post %s' % post_id
return self.playlist_result(
orderedSet(entries), post_id,
'%s - %s' % (uploader, title) if uploader else title,
description)

View File

@ -9,7 +9,6 @@ from ..utils import (
ExtractorError, ExtractorError,
unified_strdate, unified_strdate,
HEADRequest, HEADRequest,
float_or_none,
) )
@ -95,16 +94,7 @@ class WatIE(InfoExtractor):
m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'), m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'),
video_id, f4m_id='hds', fatal=False)) video_id, f4m_id='hds', fatal=False))
for m3u8_format in m3u8_formats: for m3u8_format in m3u8_formats:
mobj = re.search( vbr, abr = m3u8_format.get('vbr'), m3u8_format.get('abr')
r'audio.*?%3D(\d+)(?:-video.*?%3D(\d+))?', m3u8_format['url'])
if not mobj:
continue
abr, vbr = mobj.groups()
abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
m3u8_format.update({
'vbr': vbr,
'abr': abr,
})
if not vbr or not abr: if not vbr or not abr:
continue continue
f = m3u8_format.copy() f = m3u8_format.copy()

View File

@ -858,6 +858,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
{ {
'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY', 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
'only_matching': True, 'only_matching': True,
},
{
# YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
'only_matching': True,
} }
] ]

View File

@ -2123,6 +2123,7 @@ def mimetype2ext(mt):
'dash+xml': 'mpd', 'dash+xml': 'mpd',
'f4m': 'f4m', 'f4m': 'f4m',
'f4m+xml': 'f4m', 'f4m+xml': 'f4m',
'vnd.ms-sstr+xml': 'ism',
}.get(res, res) }.get(res, res)

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2016.07.11' __version__ = '2016.07.13'