diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 704a8b911..c5898701f 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.11** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.13*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.13** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.07.11 +[debug] youtube-dl version 2016.07.13 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.travis.yml b/.travis.yml index 136c339f0..c74c9cc12 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,9 +7,6 @@ python: - "3.4" - "3.5" sudo: false -install: - - bash ./devscripts/install_srelay.sh - - export PATH=$PATH:$(pwd)/tmp/srelay-0.4.8b6 script: nosetests test --verbose notifications: email: diff --git a/devscripts/gh-pages/generate-download.py b/devscripts/gh-pages/generate-download.py index 392e3ba21..fcd7e1dff 100755 --- a/devscripts/gh-pages/generate-download.py +++ b/devscripts/gh-pages/generate-download.py @@ -15,13 +15,9 @@ data = urllib.request.urlopen(URL).read() with open('download.html.in', 'r', encoding='utf-8') as tmplf: template = tmplf.read() -md5sum = hashlib.md5(data).hexdigest() -sha1sum = hashlib.sha1(data).hexdigest() sha256sum = hashlib.sha256(data).hexdigest() template = template.replace('@PROGRAM_VERSION@', version) template = template.replace('@PROGRAM_URL@', URL) -template = template.replace('@PROGRAM_MD5SUM@', md5sum) -template = template.replace('@PROGRAM_SHA1SUM@', sha1sum) template = template.replace('@PROGRAM_SHA256SUM@', sha256sum) template = template.replace('@EXE_URL@', versions_info['versions'][version]['exe'][0]) template = template.replace('@EXE_SHA256SUM@', versions_info['versions'][version]['exe'][1]) diff --git a/devscripts/install_srelay.sh b/devscripts/install_srelay.sh deleted file mode 100755 index 33ce8a3f7..000000000 --- a/devscripts/install_srelay.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -mkdir -p tmp && cd tmp -wget -N http://downloads.sourceforge.net/project/socks-relay/socks-relay/srelay-0.4.8/srelay-0.4.8b6.tar.gz -tar zxvf srelay-0.4.8b6.tar.gz -cd srelay-0.4.8b6 -./configure -make diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 5bcd6de1c..282bd0e6b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -568,6 +568,7 @@ - **rtve.es:infantil**: RTVE infantil - **rtve.es:live**: RTVE.es live streams - **RTVNH** + - **Rudo** - **RUHD** - **RulePorn** - **rutube**: Rutube videos @@ -794,6 +795,7 @@ - **vine:user** - **vk**: VK - **vk:uservideos**: VK - User's Videos + - **vk:wallpost** - **vlive** - **Vodlocker** - **VoiceRepublic** diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 4b3cd8c65..23c6e505b 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -55,12 +55,11 @@ class BBCCoUkIE(InfoExtractor): 'url': 'http://www.bbc.co.uk/programmes/b039g8p7', 'info_dict': { 'id': 'b039d07m', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4', 'description': 'The Canadian poet and songwriter reflects on his musical career.', }, 'params': { - # rtmp download 'skip_download': True, } }, @@ -92,7 +91,7 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, - 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only', + 'skip': 'this episode is not currently available', }, { 'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion', @@ -107,7 +106,7 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, - 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only', + 'skip': 'this episode is not currently available', }, { 'url': 'http://www.bbc.co.uk/programmes/b04v20dw', 'info_dict': { @@ -127,13 +126,12 @@ class BBCCoUkIE(InfoExtractor): 'note': 'Audio', 'info_dict': { 'id': 'p022h44j', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances', 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.", 'duration': 227, }, 'params': { - # rtmp download 'skip_download': True, } }, { @@ -141,13 +139,12 @@ class BBCCoUkIE(InfoExtractor): 'note': 'Video', 'info_dict': { 'id': 'p025c103', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)', 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014', 'duration': 226, }, 'params': { - # rtmp download 'skip_download': True, } }, { @@ -163,7 +160,7 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, - 'skip': 'geolocation', + 'skip': 'this episode is not currently available', }, { 'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition', 'info_dict': { @@ -177,7 +174,7 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, - 'skip': 'geolocation', + 'skip': 'this episode is not currently available', }, { # iptv-all mediaset fails with geolocation however there is no geo restriction # for this programme at all @@ -192,18 +189,17 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, - 'skip': 'Now it\'s really geo-restricted', + 'skip': 'this episode is not currently available on BBC iPlayer Radio', }, { # compact player (https://github.com/rg3/youtube-dl/issues/8147) 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player', 'info_dict': { 'id': 'p028bfkj', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews', 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews', }, 'params': { - # rtmp download 'skip_download': True, }, }, { @@ -249,7 +245,7 @@ class BBCCoUkIE(InfoExtractor): pass elif transfer_format == 'hls': formats.extend(self._extract_m3u8_formats( - href, programme_id, ext='mp4', entry_protocol='m3u8_native', + href, programme_id, 'mp4', 'm3u8_native', m3u8_id=supplier, fatal=False)) # Direct link else: @@ -305,13 +301,14 @@ class BBCCoUkIE(InfoExtractor): for connection in self._extract_connections(media): conn_formats = self._extract_connection(connection, programme_id) for format in conn_formats: - format.update({ - 'width': width, - 'height': height, - 'vbr': vbr, - 'vcodec': vcodec, - 'filesize': file_size, - }) + if format.get('protocol') != 'm3u8_native': + format.update({ + 'width': width, + 'height': height, + 'vbr': vbr, + 'vcodec': vcodec, + 'filesize': file_size, + }) if service: format['format_id'] = '%s_%s' % (service, format['format_id']) formats.extend(conn_formats) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 57ce0c174..aeb22be16 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -26,6 +26,8 @@ from ..utils import ( unescapeHTML, unsmuggle_url, update_url_query, + clean_html, + mimetype2ext, ) @@ -544,14 +546,16 @@ class BrightcoveNewIE(InfoExtractor): formats = [] for source in json_data.get('sources', []): container = source.get('container') - source_type = source.get('type') + ext = mimetype2ext(source.get('type')) src = source.get('src') - if source_type == 'application/x-mpegURL' or container == 'M2TS': + if ext == 'ism': + continue + elif ext == 'm3u8' or container == 'M2TS': if not src: continue formats.extend(self._extract_m3u8_formats( src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - elif source_type == 'application/dash+xml': + elif ext == 'mpd': if not src: continue formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False)) @@ -567,7 +571,7 @@ class BrightcoveNewIE(InfoExtractor): 'tbr': tbr, 'filesize': int_or_none(source.get('size')), 'container': container, - 'ext': container.lower(), + 'ext': ext or container.lower(), } if width == 0 and height == 0: f.update({ @@ -620,7 +624,7 @@ class BrightcoveNewIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'description': json_data.get('description'), + 'description': clean_html(json_data.get('description')), 'thumbnail': json_data.get('thumbnail') or json_data.get('poster'), 'duration': float_or_none(json_data.get('duration'), 1000), 'timestamp': parse_iso8601(json_data.get('published_at')), diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index df546da27..29544c1a8 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1207,6 +1207,7 @@ class InfoExtractor(object): 'url': format_url(line.strip()), 'tbr': tbr, 'ext': ext, + 'fps': float_or_none(last_info.get('FRAME-RATE')), 'protocol': entry_protocol, 'preference': preference, } @@ -1215,24 +1216,17 @@ class InfoExtractor(object): width_str, height_str = resolution.split('x') f['width'] = int(width_str) f['height'] = int(height_str) - codecs = last_info.get('CODECS') - if codecs: - vcodec, acodec = [None] * 2 - va_codecs = codecs.split(',') - if len(va_codecs) == 1: - # Audio only entries usually come with single codec and - # no resolution. For more robustness we also check it to - # be mp4 audio. - if not resolution and va_codecs[0].startswith('mp4a'): - vcodec, acodec = 'none', va_codecs[0] - else: - vcodec = va_codecs[0] - else: - vcodec, acodec = va_codecs[:2] + # Unified Streaming Platform + mobj = re.search( + r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url']) + if mobj: + abr, vbr = mobj.groups() + abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000) f.update({ - 'acodec': acodec, - 'vcodec': vcodec, + 'vbr': vbr, + 'abr': abr, }) + f.update(parse_codecs(last_info.get('CODECS'))) if last_media is not None: f['m3u8_media'] = last_media last_media = None diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 84b36f44c..7e5d4f227 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -51,8 +51,11 @@ class CSpanIE(InfoExtractor): 'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers', 'info_dict': { 'id': 'judiciary031715', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Immigration Reforms Needed to Protect Skilled American Workers', + }, + 'params': { + 'skip_download': True, # m3u8 downloads } }] diff --git a/youtube_dl/extractor/dbtv.py b/youtube_dl/extractor/dbtv.py index 133cdc50b..caff8842e 100644 --- a/youtube_dl/extractor/dbtv.py +++ b/youtube_dl/extractor/dbtv.py @@ -4,78 +4,47 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - float_or_none, - int_or_none, - clean_html, -) class DBTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:(?:lazyplayer|player)/)?(?P[0-9]+)(?:#(?P.+))?' + _VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:[^/]+/)?(?P[0-9]+)(?:#(?P.+))?' _TESTS = [{ 'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', - 'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc', + 'md5': '2e24f67936517b143a234b4cadf792ec', 'info_dict': { - 'id': '33100', + 'id': '3649835190001', 'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', 'ext': 'mp4', 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', 'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0', - 'thumbnail': 're:https?://.*\.jpg$', - 'timestamp': 1404039863.438, + 'thumbnail': 're:https?://.*\.jpg', + 'timestamp': 1404039863, 'upload_date': '20140629', 'duration': 69.544, - 'view_count': int, - 'categories': list, - } + 'uploader_id': '1027729757001', + }, + 'add_ie': ['BrightcoveNew'] }, { 'url': 'http://dbtv.no/3649835190001', 'only_matching': True, }, { 'url': 'http://www.dbtv.no/lazyplayer/4631135248001', 'only_matching': True, + }, { + 'url': 'http://dbtv.no/vice/5000634109001', + 'only_matching': True, + }, { + 'url': 'http://dbtv.no/filmtrailer/3359293614001', + 'only_matching': True, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') or video_id - - data = self._download_json( - 'http://api.dbtv.no/discovery/%s' % video_id, display_id) - - video = data['playlist'][0] - - formats = [{ - 'url': f['URL'], - 'vcodec': f.get('container'), - 'width': int_or_none(f.get('width')), - 'height': int_or_none(f.get('height')), - 'vbr': float_or_none(f.get('rate'), 1000), - 'filesize': int_or_none(f.get('size')), - } for f in video['renditions'] if 'URL' in f] - - if not formats: - for url_key, format_id in [('URL', 'mp4'), ('HLSURL', 'hls')]: - if url_key in video: - formats.append({ - 'url': video[url_key], - 'format_id': format_id, - }) - - self._sort_formats(formats) + video_id, display_id = re.match(self._VALID_URL, url).groups() return { - 'id': compat_str(video['id']), + '_type': 'url_transparent', + 'url': 'http://players.brightcove.net/1027729757001/default_default/index.html?videoId=%s' % video_id, + 'id': video_id, 'display_id': display_id, - 'title': video['title'], - 'description': clean_html(video['desc']), - 'thumbnail': video.get('splash') or video.get('thumb'), - 'timestamp': float_or_none(video.get('publishedAt'), 1000), - 'duration': float_or_none(video.get('length'), 1000), - 'view_count': int_or_none(video.get('views')), - 'categories': video.get('tags'), - 'formats': formats, + 'ie_key': 'BrightcoveNew', } diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 0040e70d4..908c9e514 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -17,8 +17,12 @@ class DreiSatIE(ZDFIE): 'ext': 'mp4', 'title': 'Waidmannsheil', 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', - 'uploader': '3sat', + 'uploader': 'SCHWEIZWEIT', + 'uploader_id': '100000210', 'upload_date': '20140913' + }, + 'params': { + 'skip_download': True, # m3u8 downloads } }, { diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py index 4c8190d68..74bbc5c51 100644 --- a/youtube_dl/extractor/ellentv.py +++ b/youtube_dl/extractor/ellentv.py @@ -6,12 +6,13 @@ import json from .common import InfoExtractor from ..utils import ( ExtractorError, + NO_DEFAULT, ) class EllenTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P[a-z0-9_-]+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.ellentv.com/videos/0-ipq1gsai/', 'md5': '4294cf98bc165f218aaa0b89e0fd8042', 'info_dict': { @@ -22,24 +23,47 @@ class EllenTVIE(InfoExtractor): 'timestamp': 1428035648, 'upload_date': '20150403', 'uploader_id': 'batchUser', - } - } + }, + }, { + # not available via http://widgets.ellentube.com/ + 'url': 'http://www.ellentv.com/videos/1-szkgu2m2/', + 'info_dict': { + 'id': '1_szkgu2m2', + 'ext': 'flv', + 'title': "Ellen's Amazingly Talented Audience", + 'description': 'md5:86ff1e376ff0d717d7171590e273f0a5', + 'timestamp': 1255140900, + 'upload_date': '20091010', + 'uploader_id': 'ellenkaltura@gmail.com', + }, + 'params': { + 'skip_download': True, + }, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage( - 'http://widgets.ellentube.com/videos/%s' % video_id, - video_id) + URLS = ('http://widgets.ellentube.com/videos/%s' % video_id, url) - partner_id = self._search_regex( - r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id') + for num, url_ in enumerate(URLS, 1): + webpage = self._download_webpage( + url_, video_id, fatal=num == len(URLS)) - kaltura_id = self._search_regex( - [r'id="kaltura_player_([^"]+)"', - r"_wb_entry_id\s*:\s*'([^']+)", - r'data-kaltura-entry-id="([^"]+)'], - webpage, 'kaltura id') + default = NO_DEFAULT if num == len(URLS) else None + + partner_id = self._search_regex( + r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id', + default=default) + + kaltura_id = self._search_regex( + [r'id="kaltura_player_([^"]+)"', + r"_wb_entry_id\s*:\s*'([^']+)", + r'data-kaltura-entry-id="([^"]+)'], + webpage, 'kaltura id', default=default) + + if partner_id and kaltura_id: + break return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura') diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b08df41b4..45817d7df 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -537,6 +537,7 @@ from .nick import ( from .niconico import NiconicoIE, NiconicoPlaylistIE from .ninecninemedia import NineCNineMediaIE from .ninegag import NineGagIE +from .ninenow import NineNowIE from .noco import NocoIE from .normalboots import NormalbootsIE from .nosvideo import NosVideoIE @@ -689,7 +690,7 @@ from .rtlnl import RtlNlIE from .rtl2 import RTL2IE from .rtp import RTPIE from .rts import RTSIE -from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE +from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE from .rtvnh import RTVNHIE from .rudo import RudoIE from .ruhd import RUHDIE @@ -989,6 +990,7 @@ from .viki import ( from .vk import ( VKIE, VKUserVideosIE, + VKWallPostIE, ) from .vlive import VLiveIE from .vodlocker import VodlockerIE diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 621257c9f..4e859e09a 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -28,10 +28,13 @@ class GameSpotIE(OnceIE): 'url': 'http://www.gamespot.com/videos/the-witcher-3-wild-hunt-xbox-one-now-playing/2300-6424837/', 'info_dict': { 'id': 'gs-2300-6424837', - 'ext': 'flv', - 'title': 'The Witcher 3: Wild Hunt [Xbox ONE] - Now Playing', + 'ext': 'mp4', + 'title': 'Now Playing - The Witcher 3: Wild Hunt', 'description': 'Join us as we take a look at the early hours of The Witcher 3: Wild Hunt and more.', }, + 'params': { + 'skip_download': True, # m3u8 downloads + }, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/ninenow.py b/youtube_dl/extractor/ninenow.py new file mode 100644 index 000000000..faa577237 --- /dev/null +++ b/youtube_dl/extractor/ninenow.py @@ -0,0 +1,72 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + int_or_none, + float_or_none, + ExtractorError, +) + + +class NineNowIE(InfoExtractor): + IE_NAME = '9now.com.au' + _VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/]+/){2}(?P[^/?#]+)' + _TESTS = [{ + # clip + 'url': 'https://www.9now.com.au/afl-footy-show/2016/clip-ciql02091000g0hp5oktrnytc', + 'md5': '17cf47d63ec9323e562c9957a968b565', + 'info_dict': { + 'id': '16801', + 'ext': 'mp4', + 'title': 'St. Kilda\'s Joey Montagna on the potential for a player\'s strike', + 'description': 'Is a boycott of the NAB Cup "on the table"?', + 'uploader_id': '4460760524001', + 'upload_date': '20160713', + 'timestamp': 1468421266, + }, + 'skip': 'Only available in Australia', + }, { + # episode + 'url': 'https://www.9now.com.au/afl-footy-show/2016/episode-19', + 'only_matching': True, + }, { + # DRM protected + 'url': 'https://www.9now.com.au/andrew-marrs-history-of-the-world/season-1/episode-1', + 'only_matching': True, + }] + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4460760524001/default_default/index.html?videoId=%s' + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + page_data = self._parse_json(self._search_regex( + r'window\.__data\s*=\s*({.*?});', webpage, + 'page data'), display_id) + common_data = page_data.get('episode', {}).get('episode') or page_data.get('clip', {}).get('clip') + video_data = common_data['video'] + + if video_data.get('drm'): + raise ExtractorError('This video is DRM protected.', expected=True) + + brightcove_id = video_data.get('brightcoveId') or 'ref:' + video_data['referenceId'] + video_id = compat_str(video_data.get('id') or brightcove_id) + title = common_data['name'] + + thumbnails = [{ + 'id': thumbnail_id, + 'url': thumbnail_url, + 'width': int_or_none(thumbnail_id[1:]) + } for thumbnail_id, thumbnail_url in common_data.get('image', {}).get('sizes', {}).items()] + + return { + '_type': 'url_transparent', + 'url': self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + 'id': video_id, + 'title': title, + 'description': common_data.get('description'), + 'duration': float_or_none(video_data.get('duration'), 1000), + 'thumbnails': thumbnails, + 'ie_key': 'BrightcoveNew', + } diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 4e3864f0d..ccb23e069 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -40,16 +40,16 @@ class ORFTVthekIE(InfoExtractor): 'skip': 'Blocked outside of Austria / Germany', }, { 'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256', - 'playlist': [{ - 'md5': '68f543909aea49d621dfc7703a11cfaf', - 'info_dict': { - 'id': '7982259', - 'ext': 'mp4', - 'title': 'Best of Ingrid Thurnher', - 'upload_date': '20140527', - 'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".', - } - }], + 'info_dict': { + 'id': '7982259', + 'ext': 'mp4', + 'title': 'Best of Ingrid Thurnher', + 'upload_date': '20140527', + 'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".', + }, + 'params': { + 'skip_download': True, # rtsp downloads + }, '_skip': 'Blocked outside of Austria / Germany', }] diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index f11e3588b..d33b05f5d 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -113,9 +113,7 @@ class RTVEALaCartaIE(InfoExtractor): png = self._download_webpage(png_request, video_id, 'Downloading url information') video_url = _decrypt_url(png) if not video_url.endswith('.f4m'): - video_url = video_url.replace( - 'resources/', 'auth/resources/' - ).replace('.net.rtve', '.multimedia.cdn.rtve') + video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve') subtitles = None if info.get('sbtFile') is not None: @@ -222,3 +220,34 @@ class RTVELiveIE(InfoExtractor): 'formats': formats, 'is_live': True, } + + +class RTVETelevisionIE(InfoExtractor): + IE_NAME = 'rtve.es:television' + _VALID_URL = r'https?://www\.rtve\.es/television/[^/]+/[^/]+/(?P\d+).shtml' + + _TEST = { + 'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml', + 'info_dict': { + 'id': '3069778', + 'ext': 'mp4', + 'title': 'Documentos TV - La revolución del móvil', + 'duration': 3496.948, + }, + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + + alacarta_url = self._search_regex( + r'data-location="alacarta_videos"[^<]+url":"(http://www\.rtve\.es/alacarta.+?)&', + webpage, 'alacarta url', default=None) + if alacarta_url is None: + raise ExtractorError( + 'The webpage doesn\'t contain any video', expected=True) + + return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key()) diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index d95ea06be..ca286abb1 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -2,11 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, parse_iso8601, + str_or_none, ) @@ -33,45 +33,27 @@ class ShahidIE(InfoExtractor): 'only_matching': True }] - def _handle_error(self, response): - if not isinstance(response, dict): - return - error = response.get('error') + def _call_api(self, path, video_id, note): + data = self._download_json( + 'http://api.shahid.net/api/v1_1/' + path, video_id, note, query={ + 'apiKey': 'sh@hid0nlin3', + 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', + }).get('data', {}) + + error = data.get('error') if error: raise ExtractorError( '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())), expected=True) - def _download_json(self, url, video_id, note='Downloading JSON metadata'): - response = super(ShahidIE, self)._download_json(url, video_id, note)['data'] - self._handle_error(response) - return response + return data def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - api_vars = { - 'id': video_id, - 'type': 'player', - 'url': 'http://api.shahid.net/api/v1_1', - 'playerType': 'episode', - } - - flashvars = self._search_regex( - r'var\s+flashvars\s*=\s*({[^}]+})', webpage, 'flashvars', default=None) - if flashvars: - for key in api_vars.keys(): - value = self._search_regex( - r'\b%s\s*:\s*(?P["\'])(?P.+?)(?P=q)' % key, - flashvars, 'type', default=None, group='value') - if value: - api_vars[key] = value - - player = self._download_json( - 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-%s.html' - % (video_id, api_vars['type']), video_id, 'Downloading player JSON') + player = self._call_api( + 'Content/Episode/%s' % video_id, + video_id, 'Downloading player JSON') if player.get('drm'): raise ExtractorError('This video is DRM protected.', expected=True) @@ -79,22 +61,11 @@ class ShahidIE(InfoExtractor): formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4') self._sort_formats(formats) - video = self._download_json( - '%s/%s/%s?%s' % ( - api_vars['url'], api_vars['playerType'], api_vars['id'], - compat_urllib_parse_urlencode({ - 'apiKey': 'sh@hid0nlin3', - 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', - })), - video_id, 'Downloading video JSON') - - video = video[api_vars['playerType']] + video = self._call_api( + 'episode/%s' % video_id, video_id, + 'Downloading video JSON')['episode'] title = video['title'] - description = video.get('description') - thumbnail = video.get('thumbnailUrl') - duration = int_or_none(video.get('duration')) - timestamp = parse_iso8601(video.get('referenceDate')) categories = [ category['name'] for category in video.get('genres', []) if 'name' in category] @@ -102,10 +73,16 @@ class ShahidIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'timestamp': timestamp, + 'description': video.get('description'), + 'thumbnail': video.get('thumbnailUrl'), + 'duration': int_or_none(video.get('duration')), + 'timestamp': parse_iso8601(video.get('referenceDate')), 'categories': categories, + 'series': video.get('showTitle') or video.get('showName'), + 'season': video.get('seasonTitle'), + 'season_number': int_or_none(video.get('seasonNumber')), + 'season_id': str_or_none(video.get('seasonId')), + 'episode_number': int_or_none(video.get('number')), + 'episode_id': video_id, 'formats': formats, } diff --git a/youtube_dl/extractor/syfy.py b/youtube_dl/extractor/syfy.py index 5ca079f88..53723b66e 100644 --- a/youtube_dl/extractor/syfy.py +++ b/youtube_dl/extractor/syfy.py @@ -1,46 +1,56 @@ from __future__ import unicode_literals -import re - -from .common import InfoExtractor +from .theplatform import ThePlatformIE +from ..utils import ( + update_url_query, + smuggle_url, +) -class SyfyIE(InfoExtractor): - _VALID_URL = r'https?://www\.syfy\.com/(?:videos/.+?vid:(?P[0-9]+)|(?!videos)(?P[^/]+)(?:$|[?#]))' - +class SyfyIE(ThePlatformIE): + _VALID_URL = r'https?://www\.syfy\.com/(?:[^/]+/)?videos/(?P[^/?#]+)' _TESTS = [{ - 'url': 'http://www.syfy.com/videos/Robot%20Combat%20League/Behind%20the%20Scenes/vid:2631458', + 'url': 'http://www.syfy.com/theinternetruinedmylife/videos/the-internet-ruined-my-life-season-1-trailer', 'info_dict': { - 'id': 'NmqMrGnXvmO1', - 'ext': 'flv', - 'title': 'George Lucas has Advice for his Daughter', - 'description': 'Listen to what insights George Lucas give his daughter Amanda.', + 'id': '2968097', + 'ext': 'mp4', + 'title': 'The Internet Ruined My Life: Season 1 Trailer', + 'description': 'One tweet, one post, one click, can destroy everything.', + 'uploader': 'NBCU-MPAT', + 'upload_date': '20170113', + 'timestamp': 1484345640, + }, + 'params': { + # m3u8 download + 'skip_download': True, }, 'add_ie': ['ThePlatform'], - }, { - 'url': 'http://www.syfy.com/wilwheaton', - 'md5': '94dfa54ee3ccb63295b276da08c415f6', - 'info_dict': { - 'id': '4yoffOOXC767', - 'ext': 'flv', - 'title': 'The Wil Wheaton Project - Premiering May 27th at 10/9c.', - 'description': 'The Wil Wheaton Project premieres May 27th at 10/9c. Don\'t miss it.', - }, - 'add_ie': ['ThePlatform'], - 'skip': 'Blocked outside the US', }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_name = mobj.group('video_name') - if video_name: - generic_webpage = self._download_webpage(url, video_name) - video_id = self._search_regex( - r'', - generic_webpage, 'video ID') - url = 'http://www.syfy.com/videos/%s/%s/vid:%s' % ( - video_name, video_name, video_id) - else: - video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id) - return self.url_result(self._og_search_video_url(webpage)) + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + syfy_mpx = list(self._parse_json(self._search_regex( + r'jQuery\.extend\([^,]+,\s*({.+})\);', webpage, 'drupal settings'), + display_id)['syfy']['syfy_mpx'].values())[0] + video_id = syfy_mpx['mpxGUID'] + title = syfy_mpx['episodeTitle'] + query = { + 'mbr': 'true', + 'manifest': 'm3u', + } + if syfy_mpx.get('entitlement') == 'auth': + resource = 'syfy<![CDATA[%s]]>%s%s' % (title, video_id, syfy_mpx.get('mpxRating', 'TV-14')) + query['auth'] = self._extract_mvpd_auth( + url, video_id, 'syfy', resource) + + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'url': smuggle_url(update_url_query( + self._proto_relative_url(syfy_mpx['releaseURL']), query), + {'force_smil_url': True}), + 'title': title, + 'id': video_id, + 'display_id': display_id, + } diff --git a/youtube_dl/extractor/threeqsdn.py b/youtube_dl/extractor/threeqsdn.py index a0bc12c81..f26937da1 100644 --- a/youtube_dl/extractor/threeqsdn.py +++ b/youtube_dl/extractor/threeqsdn.py @@ -24,16 +24,20 @@ class ThreeQSDNIE(InfoExtractor): 'title': '0280d6b9-1215-11e6-b427-0cc47a188158', 'is_live': False, }, - 'expected_warnings': ['Failed to download MPD manifest'], + 'expected_warnings': ['Failed to download MPD manifest', 'Failed to parse JSON'], }, { # live video stream 'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true', 'info_dict': { 'id': 'd755d94b-4ab9-11e3-9162-0025907ad44f', 'ext': 'mp4', - 'title': 'd755d94b-4ab9-11e3-9162-0025907ad44f', - 'is_live': False, + 'title': 're:^d755d94b-4ab9-11e3-9162-0025907ad44f [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'is_live': True, }, + 'params': { + 'skip_download': True, # m3u8 downloads + }, + 'expected_warnings': ['Failed to download MPD manifest'], }, { # live audio stream 'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48', @@ -114,7 +118,7 @@ class ThreeQSDNIE(InfoExtractor): 'vcodec': 'none' if stream_type == 'audio' else None, }) - for item_js in re.findall(r'({.*?\b(?:src|source)\s*:\s*["\'].+?})', js): + for item_js in re.findall(r'({[^{]*?\b(?:src|source)\s*:\s*["\'].+?})', js): f = self._parse_json( item_js, video_id, transform_source=js_to_json, fatal=False) if not f: diff --git a/youtube_dl/extractor/tmz.py b/youtube_dl/extractor/tmz.py index 7dbe68b5c..979856e9a 100644 --- a/youtube_dl/extractor/tmz.py +++ b/youtube_dl/extractor/tmz.py @@ -5,31 +5,27 @@ from .common import InfoExtractor class TMZIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P[^/]+)/?' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P[^/?#]+)' + _TESTS = [{ 'url': 'http://www.tmz.com/videos/0_okj015ty/', - 'md5': '791204e3bf790b1426cb2db0706184c0', + 'md5': '4d22a51ef205b6c06395d8394f72d560', 'info_dict': { 'id': '0_okj015ty', - 'url': 'http://tmz.vo.llnwd.net/o28/2014-03/13/0_okj015ty_0_rt8ro3si_2.mp4', 'ext': 'mp4', 'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!', 'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie??? Or is she just showing off her amazing boobs?', - 'thumbnail': r're:http://cdnbakmi\.kaltura\.com/.*thumbnail.*', + 'timestamp': 1394747163, + 'uploader_id': 'batchUser', + 'upload_date': '20140313', } - } + }, { + 'url': 'http://www.tmz.com/videos/0-cegprt2p/', + 'only_matching': True, + }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - return { - 'id': video_id, - 'url': self._html_search_meta('VideoURL', webpage, fatal=True), - 'title': self._og_search_title(webpage), - 'description': self._og_search_description(webpage), - 'thumbnail': self._html_search_meta('ThumbURL', webpage), - } + video_id = self._match_id(url).replace('-', '_') + return self.url_result('kaltura:591531:%s' % video_id, 'Kaltura', video_id) class TMZArticleIE(InfoExtractor): diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index df70a6b23..918f8f8bc 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -8,43 +8,36 @@ from ..compat import compat_str from ..utils import ( parse_iso8601, qualities, + determine_ext, + update_url_query, + int_or_none, ) class TVPlayIE(InfoExtractor): IE_DESC = 'TV3Play and related services' _VALID_URL = r'''(?x)https?://(?:www\.)? - (?:tvplay\.lv/parraides| - tv3play\.lt/programos| - play\.tv3\.lt/programos| - tv3play\.ee/sisu| - tv3play\.se/program| - tv6play\.se/program| - tv8play\.se/program| - tv10play\.se/program| - tv3play\.no/programmer| - viasat4play\.no/programmer| - tv6play\.no/programmer| - tv3play\.dk/programmer| + (?:tvplay(?:\.skaties)?\.lv/parraides| + (?:tv3play|play\.tv3)\.lt/programos| + tv3play(?:\.tv3)?\.ee/sisu| + tv(?:3|6|8|10)play\.se/program| + (?:(?:tv3play|viasat4play|tv6play)\.no|tv3play\.dk)/programmer| play\.novatv\.bg/programi )/[^/]+/(?P\d+) ''' _TESTS = [ { 'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true', + 'md5': 'a1612fe0849455423ad8718fe049be21', 'info_dict': { 'id': '418113', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Kādi ir īri? - Viņas melo labāk', 'description': 'Baiba apsmej īrus, kādi tie ir un ko viņi dara.', 'duration': 25, 'timestamp': 1406097056, 'upload_date': '20140723', }, - 'params': { - # rtmp download - 'skip_download': True, - }, }, { 'url': 'http://play.tv3.lt/programos/moterys-meluoja-geriau/409229?autostart=true', @@ -82,7 +75,7 @@ class TVPlayIE(InfoExtractor): 'url': 'http://www.tv3play.se/program/husraddarna/395385?autostart=true', 'info_dict': { 'id': '395385', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Husräddarna S02E07', 'description': 'md5:f210c6c89f42d4fc39faa551be813777', 'duration': 2574, @@ -90,7 +83,6 @@ class TVPlayIE(InfoExtractor): 'upload_date': '20140520', }, 'params': { - # rtmp download 'skip_download': True, }, }, @@ -98,7 +90,7 @@ class TVPlayIE(InfoExtractor): 'url': 'http://www.tv6play.se/program/den-sista-dokusapan/266636?autostart=true', 'info_dict': { 'id': '266636', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Den sista dokusåpan S01E08', 'description': 'md5:295be39c872520221b933830f660b110', 'duration': 1492, @@ -107,7 +99,6 @@ class TVPlayIE(InfoExtractor): 'age_limit': 18, }, 'params': { - # rtmp download 'skip_download': True, }, }, @@ -115,7 +106,7 @@ class TVPlayIE(InfoExtractor): 'url': 'http://www.tv8play.se/program/antikjakten/282756?autostart=true', 'info_dict': { 'id': '282756', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Antikjakten S01E10', 'description': 'md5:1b201169beabd97e20c5ad0ad67b13b8', 'duration': 2646, @@ -123,7 +114,6 @@ class TVPlayIE(InfoExtractor): 'upload_date': '20120925', }, 'params': { - # rtmp download 'skip_download': True, }, }, @@ -131,7 +121,7 @@ class TVPlayIE(InfoExtractor): 'url': 'http://www.tv3play.no/programmer/anna-anka-soker-assistent/230898?autostart=true', 'info_dict': { 'id': '230898', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Anna Anka søker assistent - Ep. 8', 'description': 'md5:f80916bf5bbe1c5f760d127f8dd71474', 'duration': 2656, @@ -139,7 +129,6 @@ class TVPlayIE(InfoExtractor): 'upload_date': '20100628', }, 'params': { - # rtmp download 'skip_download': True, }, }, @@ -147,7 +136,7 @@ class TVPlayIE(InfoExtractor): 'url': 'http://www.viasat4play.no/programmer/budbringerne/21873?autostart=true', 'info_dict': { 'id': '21873', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Budbringerne program 10', 'description': 'md5:4db78dc4ec8a85bb04fd322a3ee5092d', 'duration': 1297, @@ -155,7 +144,6 @@ class TVPlayIE(InfoExtractor): 'upload_date': '20090929', }, 'params': { - # rtmp download 'skip_download': True, }, }, @@ -163,7 +151,7 @@ class TVPlayIE(InfoExtractor): 'url': 'http://www.tv6play.no/programmer/hotelinspektor-alex-polizzi/361883?autostart=true', 'info_dict': { 'id': '361883', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Hotelinspektør Alex Polizzi - Ep. 10', 'description': 'md5:3ecf808db9ec96c862c8ecb3a7fdaf81', 'duration': 2594, @@ -171,7 +159,6 @@ class TVPlayIE(InfoExtractor): 'upload_date': '20140224', }, 'params': { - # rtmp download 'skip_download': True, }, }, @@ -191,6 +178,14 @@ class TVPlayIE(InfoExtractor): 'skip_download': True, }, }, + { + 'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true', + 'only_matching': True, + }, + { + 'url': 'http://tv3play.tv3.ee/sisu/kodu-keset-linna/238551?autostart=true', + 'only_matching': True, + } ] def _real_extract(self, url): @@ -199,7 +194,9 @@ class TVPlayIE(InfoExtractor): video = self._download_json( 'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON') - if video['is_geo_blocked']: + title = video['title'] + + if video.get('is_geo_blocked'): self.report_warning( 'This content might not be available in your country due to copyright reasons') @@ -208,42 +205,50 @@ class TVPlayIE(InfoExtractor): quality = qualities(['hls', 'medium', 'high']) formats = [] - for format_id, video_url in streams['streams'].items(): + for format_id, video_url in streams.get('streams', {}).items(): if not video_url or not isinstance(video_url, compat_str): continue - fmt = { - 'format_id': format_id, - 'preference': quality(format_id), - } - if video_url.startswith('rtmp'): - m = re.search(r'^(?Prtmp://[^/]+/(?P[^/]+))/(?P.+)$', video_url) - if not m: - continue - fmt.update({ - 'ext': 'flv', - 'url': m.group('url'), - 'app': m.group('app'), - 'play_path': m.group('playpath'), - }) - elif video_url.endswith('.f4m'): + ext = determine_ext(video_url) + if ext == 'f4m': formats.extend(self._extract_f4m_formats( - video_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id)) - continue + update_url_query(video_url, { + 'hdcore': '3.5.0', + 'plugin': 'aasp-3.5.0.151.81' + }), video_id, f4m_id='hds', fatal=False)) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) else: - fmt.update({ - 'url': video_url, - }) - formats.append(fmt) - + fmt = { + 'format_id': format_id, + 'quality': quality(format_id), + 'ext': ext, + } + if video_url.startswith('rtmp'): + m = re.search(r'^(?Prtmp://[^/]+/(?P[^/]+))/(?P.+)$', video_url) + if not m: + continue + fmt.update({ + 'ext': 'flv', + 'url': m.group('url'), + 'app': m.group('app'), + 'play_path': m.group('playpath'), + }) + else: + fmt.update({ + 'url': video_url, + }) + formats.append(fmt) self._sort_formats(formats) return { 'id': video_id, - 'title': video['title'], - 'description': video['description'], - 'duration': video['duration'], - 'timestamp': parse_iso8601(video['created_at']), - 'view_count': video['views']['total'], - 'age_limit': video.get('age_limit', 0), + 'title': title, + 'description': video.get('description'), + 'duration': int_or_none(video.get('duration')), + 'timestamp': parse_iso8601(video.get('created_at')), + 'view_count': int_or_none(video.get('views', {}).get('total')), + 'age_limit': int_or_none(video.get('age_limit', 0)), 'formats': formats, } diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 758d9c86b..3ee66e23e 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -6,11 +6,18 @@ import json import sys from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_str, + compat_urlparse, +) from ..utils import ( + clean_html, ExtractorError, + get_element_by_class, int_or_none, orderedSet, + parse_duration, + remove_start, str_to_int, unescapeHTML, unified_strdate, @@ -20,7 +27,55 @@ from .vimeo import VimeoIE from .pladform import PladformIE -class VKIE(InfoExtractor): +class VKBaseIE(InfoExtractor): + _NETRC_MACHINE = 'vk' + + def _login(self): + (username, password) = self._get_login_info() + if username is None: + return + + login_page, url_handle = self._download_webpage_handle( + 'https://vk.com', None, 'Downloading login page') + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'email': username.encode('cp1251'), + 'pass': password.encode('cp1251'), + }) + + # https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header + # and expects the first one to be set rather than second (see + # https://github.com/rg3/youtube-dl/issues/9841#issuecomment-227871201). + # As of RFC6265 the newer one cookie should be set into cookie store + # what actually happens. + # We will workaround this VK issue by resetting the remixlhk cookie to + # the first one manually. + cookies = url_handle.headers.get('Set-Cookie') + if cookies: + if sys.version_info[0] >= 3: + cookies = cookies.encode('iso-8859-1') + cookies = cookies.decode('utf-8') + remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies) + if remixlhk: + value, domain = remixlhk.groups() + self._set_cookie(domain, 'remixlhk', value) + + login_page = self._download_webpage( + 'https://login.vk.com/?act=login', None, + note='Logging in as %s' % username, + data=urlencode_postdata(login_form)) + + if re.search(r'onLoginFailed', login_page): + raise ExtractorError( + 'Unable to login, incorrect username and/or password', expected=True) + + def _real_initialize(self): + self._login() + + +class VKIE(VKBaseIE): IE_NAME = 'vk' IE_DESC = 'VK' _VALID_URL = r'''(?x) @@ -38,8 +93,6 @@ class VKIE(InfoExtractor): (?P-?\d+_\d+)(?:.*\blist=(?P[\da-f]+))? ) ''' - _NETRC_MACHINE = 'vk' - _TESTS = [ { 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', @@ -189,49 +242,6 @@ class VKIE(InfoExtractor): } ] - def _login(self): - (username, password) = self._get_login_info() - if username is None: - return - - login_page, url_handle = self._download_webpage_handle( - 'https://vk.com', None, 'Downloading login page') - - login_form = self._hidden_inputs(login_page) - - login_form.update({ - 'email': username.encode('cp1251'), - 'pass': password.encode('cp1251'), - }) - - # https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header - # and expects the first one to be set rather than second (see - # https://github.com/rg3/youtube-dl/issues/9841#issuecomment-227871201). - # As of RFC6265 the newer one cookie should be set into cookie store - # what actually happens. - # We will workaround this VK issue by resetting the remixlhk cookie to - # the first one manually. - cookies = url_handle.headers.get('Set-Cookie') - if sys.version_info[0] >= 3: - cookies = cookies.encode('iso-8859-1') - cookies = cookies.decode('utf-8') - remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies) - if remixlhk: - value, domain = remixlhk.groups() - self._set_cookie(domain, 'remixlhk', value) - - login_page = self._download_webpage( - 'https://login.vk.com/?act=login', None, - note='Logging in as %s' % username, - data=urlencode_postdata(login_form)) - - if re.search(r'onLoginFailed', login_page): - raise ExtractorError( - 'Unable to login, incorrect username and/or password', expected=True) - - def _real_initialize(self): - self._login() - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('videoid') @@ -355,7 +365,7 @@ class VKIE(InfoExtractor): } -class VKUserVideosIE(InfoExtractor): +class VKUserVideosIE(VKBaseIE): IE_NAME = 'vk:uservideos' IE_DESC = "VK - User's Videos" _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)' @@ -396,3 +406,121 @@ class VKUserVideosIE(InfoExtractor): webpage, 'title', default=page_id)) return self.playlist_result(entries, page_id, title) + + +class VKWallPostIE(VKBaseIE): + IE_NAME = 'vk:wallpost' + _VALID_URL = r'https?://(?:(?:(?:(?:m|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P-?\d+_\d+)))' + _TESTS = [{ + # public page URL, audio playlist + 'url': 'https://vk.com/bs.official?w=wall-23538238_35', + 'info_dict': { + 'id': '23538238_35', + 'title': 'Black Shadow - Wall post 23538238_35', + 'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c', + }, + 'playlist': [{ + 'md5': '5ba93864ec5b85f7ce19a9af4af080f6', + 'info_dict': { + 'id': '135220665_111806521', + 'ext': 'mp3', + 'title': 'Black Shadow - Слепое Верование', + 'duration': 370, + 'uploader': 'Black Shadow', + 'artist': 'Black Shadow', + 'track': 'Слепое Верование', + }, + }, { + 'md5': '4cc7e804579122b17ea95af7834c9233', + 'info_dict': { + 'id': '135220665_111802303', + 'ext': 'mp3', + 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!', + 'duration': 423, + 'uploader': 'Black Shadow', + 'artist': 'Black Shadow', + 'track': 'Война - Негасимое Бездны Пламя!', + }, + 'params': { + 'skip_download': True, + }, + }], + 'skip': 'Requires vk account credentials', + }, { + # single YouTube embed, no leading - + 'url': 'https://vk.com/wall85155021_6319', + 'info_dict': { + 'id': '85155021_6319', + 'title': 'Sergey Gorbunov - Wall post 85155021_6319', + }, + 'playlist_count': 1, + 'skip': 'Requires vk account credentials', + }, { + # wall page URL + 'url': 'https://vk.com/wall-23538238_35', + 'only_matching': True, + }, { + # mobile wall page URL + 'url': 'https://m.vk.com/wall-23538238_35', + 'only_matching': True, + }] + + def _real_extract(self, url): + post_id = self._match_id(url) + + wall_url = 'https://vk.com/wall%s' % post_id + + post_id = remove_start(post_id, '-') + + webpage = self._download_webpage(wall_url, post_id) + + error = self._html_search_regex( + r'>Error\s*]+class=["\']body["\'][^>]*>([^<]+)', + webpage, 'error', default=None) + if error: + raise ExtractorError('VK said: %s' % error, expected=True) + + description = clean_html(get_element_by_class('wall_post_text', webpage)) + uploader = clean_html(get_element_by_class( + 'fw_post_author', webpage)) or self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail(webpage) + + entries = [] + + for audio in re.finditer(r'''(?sx) + ]+ + id=(?P["\'])audio_info(?P\d+_\d+).*?(?P=q1)[^>]+ + value=(?P["\'])(?Phttp.+?)(?P=q2) + .+? + ''', webpage): + audio_html = audio.group(0) + audio_id = audio.group('id') + duration = parse_duration(get_element_by_class('duration', audio_html)) + track = self._html_search_regex( + r']+id=["\']title%s[^>]*>([^<]+)' % audio_id, + audio_html, 'title', default=None) + artist = self._html_search_regex( + r'>([^<]+)\s*&ndash', audio_html, + 'artist', default=None) + entries.append({ + 'id': audio_id, + 'url': audio.group('url'), + 'title': '%s - %s' % (artist, track) if artist and track else audio_id, + 'thumbnail': thumbnail, + 'duration': duration, + 'uploader': uploader, + 'artist': artist, + 'track': track, + }) + + for video in re.finditer( + r']+href=(["\'])(?P/video(?:-?[\d_]+).*?)\1', webpage): + entries.append(self.url_result( + compat_urlparse.urljoin(url, video.group('url')), VKIE.ie_key())) + + title = 'Wall post %s' % post_id + + return self.playlist_result( + orderedSet(entries), post_id, + '%s - %s' % (uploader, title) if uploader else title, + description) diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py index de7d6b559..48fc438ed 100644 --- a/youtube_dl/extractor/wat.py +++ b/youtube_dl/extractor/wat.py @@ -9,7 +9,6 @@ from ..utils import ( ExtractorError, unified_strdate, HEADRequest, - float_or_none, ) @@ -95,16 +94,7 @@ class WatIE(InfoExtractor): m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'), video_id, f4m_id='hds', fatal=False)) for m3u8_format in m3u8_formats: - mobj = re.search( - r'audio.*?%3D(\d+)(?:-video.*?%3D(\d+))?', m3u8_format['url']) - if not mobj: - continue - abr, vbr = mobj.groups() - abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000) - m3u8_format.update({ - 'vbr': vbr, - 'abr': abr, - }) + vbr, abr = m3u8_format.get('vbr'), m3u8_format.get('abr') if not vbr or not abr: continue f = m3u8_format.copy() diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1687d5ef9..49c264c3a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -858,6 +858,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): { 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY', 'only_matching': True, + }, + { + # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059) + 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo', + 'only_matching': True, } ] diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 0fd1e71b4..42028125b 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2123,6 +2123,7 @@ def mimetype2ext(mt): 'dash+xml': 'mpd', 'f4m': 'f4m', 'f4m+xml': 'f4m', + 'vnd.ms-sstr+xml': 'ism', }.get(res, res) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d60480223..56f9f5986 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.07.11' +__version__ = '2016.07.13'