This commit is contained in:
Gilles Habran 2016-07-29 08:47:46 +02:00
commit 8ddff4c81a
40 changed files with 724 additions and 552 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.17** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.28**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.07.17 [debug] youtube-dl version 2016.07.28
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -178,3 +178,4 @@ Artur Krysiak
Jakub Adam Wieczorek Jakub Adam Wieczorek
Aleksandar Topuzović Aleksandar Topuzović
Nehal Patel Nehal Patel
Rob van Bekkum

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import unicode_literals from __future__ import unicode_literals
import itertools
import json import json
import os import os
import re import re
@ -21,21 +22,26 @@ def format_size(bytes):
total_bytes = 0 total_bytes = 0
releases = json.loads(compat_urllib_request.urlopen( for page in itertools.count(1):
'https://api.github.com/repos/rg3/youtube-dl/releases').read().decode('utf-8')) releases = json.loads(compat_urllib_request.urlopen(
'https://api.github.com/repos/rg3/youtube-dl/releases?page=%s' % page
).read().decode('utf-8'))
for release in releases: if not releases:
compat_print(release['name']) break
for asset in release['assets']:
asset_name = asset['name'] for release in releases:
total_bytes += asset['download_count'] * asset['size'] compat_print(release['name'])
if all(not re.match(p, asset_name) for p in ( for asset in release['assets']:
r'^youtube-dl$', asset_name = asset['name']
r'^youtube-dl-\d{4}\.\d{2}\.\d{2}(?:\.\d+)?\.tar\.gz$', total_bytes += asset['download_count'] * asset['size']
r'^youtube-dl\.exe$')): if all(not re.match(p, asset_name) for p in (
continue r'^youtube-dl$',
compat_print( r'^youtube-dl-\d{4}\.\d{2}\.\d{2}(?:\.\d+)?\.tar\.gz$',
' %s size: %s downloads: %d' r'^youtube-dl\.exe$')):
% (asset_name, format_size(asset['size']), asset['download_count'])) continue
compat_print(
' %s size: %s downloads: %d'
% (asset_name, format_size(asset['size']), asset['download_count']))
compat_print('total downloads traffic: %s' % format_size(total_bytes)) compat_print('total downloads traffic: %s' % format_size(total_bytes))

View File

@ -46,6 +46,7 @@
- **archive.org**: archive.org videos - **archive.org**: archive.org videos
- **ARD** - **ARD**
- **ARD:mediathek** - **ARD:mediathek**
- **Arkena**
- **arte.tv** - **arte.tv**
- **arte.tv:+7** - **arte.tv:+7**
- **arte.tv:cinema** - **arte.tv:cinema**
@ -141,7 +142,7 @@
- **CollegeRama** - **CollegeRama**
- **ComCarCoff** - **ComCarCoff**
- **ComedyCentral** - **ComedyCentral**
- **ComedyCentralShows**: The Daily Show / The Colbert Report - **ComedyCentralTV**
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
- **Coub** - **Coub**
- **Cracked** - **Cracked**
@ -336,6 +337,8 @@
- **kuwo:song**: 酷我音乐 - **kuwo:song**: 酷我音乐
- **la7.it** - **la7.it**
- **Laola1Tv** - **Laola1Tv**
- **Lcp**
- **LcpPlay**
- **Le**: 乐视网 - **Le**: 乐视网
- **Learnr** - **Learnr**
- **Lecture2Go** - **Lecture2Go**
@ -397,7 +400,6 @@
- **MSN** - **MSN**
- **MTV** - **MTV**
- **mtv.de** - **mtv.de**
- **mtviggy.com**
- **mtvservices:embedded** - **mtvservices:embedded**
- **MuenchenTV**: münchen.tv - **MuenchenTV**: münchen.tv
- **MusicPlayOn** - **MusicPlayOn**
@ -437,7 +439,6 @@
- **Newstube** - **Newstube**
- **NextMedia**: 蘋果日報 - **NextMedia**: 蘋果日報
- **NextMediaActionNews**: 蘋果日報 - 動新聞 - **NextMediaActionNews**: 蘋果日報 - 動新聞
- **nextmovie.com**
- **nfb**: National Film Board of Canada - **nfb**: National Film Board of Canada
- **nfl.com** - **nfl.com**
- **nhl.com** - **nhl.com**
@ -477,6 +478,7 @@
- **NYTimes** - **NYTimes**
- **NYTimesArticle** - **NYTimesArticle**
- **ocw.mit.edu** - **ocw.mit.edu**
- **OdaTV**
- **Odnoklassniki** - **Odnoklassniki**
- **OktoberfestTV** - **OktoberfestTV**
- **on.aol.com** - **on.aol.com**
@ -694,6 +696,7 @@
- **TNAFlix** - **TNAFlix**
- **TNAFlixNetworkEmbed** - **TNAFlixNetworkEmbed**
- **toggle** - **toggle**
- **Tosh**: Tosh.0
- **tou.tv** - **tou.tv**
- **Toypics**: Toypics user profile - **Toypics**: Toypics user profile
- **ToypicsUser**: Toypics user profile - **ToypicsUser**: Toypics user profile

View File

@ -101,8 +101,6 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch(':ytsubs', ['youtube:subscriptions']) self.assertMatch(':ytsubs', ['youtube:subscriptions'])
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions']) self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
self.assertMatch(':ythistory', ['youtube:history']) self.assertMatch(':ythistory', ['youtube:history'])
self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
self.assertMatch(':tds', ['ComedyCentralShows'])
def test_vimeo_matching(self): def test_vimeo_matching(self):
self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel']) self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel'])

View File

@ -73,6 +73,7 @@ class ARDMediathekIE(InfoExtractor):
'description': 'md5:c0c1c8048514deaed2a73b3a60eecacb', 'description': 'md5:c0c1c8048514deaed2a73b3a60eecacb',
'duration': 3287, 'duration': 3287,
}, },
'skip': 'Video is no longer available',
}] }]
def _extract_media_info(self, media_info_url, webpage, video_id): def _extract_media_info(self, media_info_url, webpage, video_id):

View File

@ -0,0 +1,115 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
float_or_none,
int_or_none,
mimetype2ext,
parse_iso8601,
strip_jsonp,
)
class ArkenaIE(InfoExtractor):
_VALID_URL = r'https?://play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)'
_TESTS = [{
'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411',
'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
'info_dict': {
'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
'ext': 'mp4',
'title': 'Big Buck Bunny',
'description': 'Royalty free test video',
'timestamp': 1432816365,
'upload_date': '20150528',
'is_live': False,
},
}, {
'url': 'https://play.arkena.com/config/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411/?callbackMethod=jQuery1111023664739129262213_1469227693893',
'only_matching': True,
}, {
'url': 'http://play.arkena.com/config/avp/v1/player/media/327336/darkmatter/131064/?callbackMethod=jQuery1111002221189684892677_1469227595972',
'only_matching': True,
}, {
'url': 'http://play.arkena.com/embed/avp/v1/player/media/327336/darkmatter/131064/',
'only_matching': True,
}]
@staticmethod
def _extract_url(webpage):
# See https://support.arkena.com/display/PLAY/Ways+to+embed+your+video
mobj = re.search(
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//play\.arkena\.com/embed/avp/.+?)\1',
webpage)
if mobj:
return mobj.group('url')
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
account_id = mobj.group('account_id')
playlist = self._download_json(
'https://play.arkena.com/config/avp/v2/player/media/%s/0/%s/?callbackMethod=_'
% (video_id, account_id),
video_id, transform_source=strip_jsonp)['Playlist'][0]
media_info = playlist['MediaInfo']
title = media_info['Title']
media_files = playlist['MediaFiles']
is_live = False
formats = []
for kind_case, kind_formats in media_files.items():
kind = kind_case.lower()
for f in kind_formats:
f_url = f.get('Url')
if not f_url:
continue
is_live = f.get('Live') == 'true'
exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None))
if kind == 'm3u8' or 'm3u8' in exts:
formats.extend(self._extract_m3u8_formats(
f_url, video_id, 'mp4',
entry_protocol='m3u8' if is_live else 'm3u8_native',
m3u8_id=kind, fatal=False, live=is_live))
elif kind == 'flash' or 'f4m' in exts:
formats.extend(self._extract_f4m_formats(
f_url, video_id, f4m_id=kind, fatal=False))
elif kind == 'dash' or 'mpd' in exts:
formats.extend(self._extract_mpd_formats(
f_url, video_id, mpd_id=kind, fatal=False))
elif kind == 'silverlight':
# TODO: process when ism is supported (see
# https://github.com/rg3/youtube-dl/issues/8118)
continue
else:
tbr = float_or_none(f.get('Bitrate'), 1000)
formats.append({
'url': f_url,
'format_id': '%s-%d' % (kind, tbr) if tbr else kind,
'tbr': tbr,
})
self._sort_formats(formats)
description = media_info.get('Description')
video_id = media_info.get('VideoId') or video_id
timestamp = parse_iso8601(media_info.get('PublishDate'))
thumbnails = [{
'url': thumbnail['Url'],
'width': int_or_none(thumbnail.get('Size')),
} for thumbnail in (media_info.get('Poster') or []) if thumbnail.get('Url')]
return {
'id': video_id,
'title': title,
'description': description,
'timestamp': timestamp,
'is_live': is_live,
'thumbnails': thumbnails,
'formats': formats,
}

View File

@ -12,7 +12,7 @@ class BigflixIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.bigflix.com/Hindi-movies/Action-movies/Singham-Returns/16537', 'url': 'http://www.bigflix.com/Hindi-movies/Action-movies/Singham-Returns/16537',
'md5': 'ec76aa9b1129e2e5b301a474e54fab74', 'md5': 'dc1b4aebb46e3a7077ecc0d9f43f61e3',
'info_dict': { 'info_dict': {
'id': '16537', 'id': '16537',
'ext': 'mp4', 'ext': 'mp4',
@ -26,7 +26,7 @@ class BigflixIE(InfoExtractor):
'id': '16070', 'id': '16070',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Madarasapatinam', 'title': 'Madarasapatinam',
'description': 'md5:63b9b8ed79189c6f0418c26d9a3452ca', 'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b',
'formats': 'mincount:2', 'formats': 'mincount:2',
}, },
'params': { 'params': {

View File

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import datetime
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -10,8 +9,10 @@ from ..compat import (
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
parse_iso8601, clean_html,
parse_duration,
str_to_int, str_to_int,
unified_strdate,
) )
@ -26,14 +27,14 @@ class CamdemyIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Ch1-1 Introduction, Signals (02-23-2012)', 'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
'description': '',
'creator': 'ss11spring', 'creator': 'ss11spring',
'duration': 1591,
'upload_date': '20130114', 'upload_date': '20130114',
'timestamp': 1358154556,
'view_count': int, 'view_count': int,
} }
}, { }, {
# With non-empty description # With non-empty description
# webpage returns "No permission or not login"
'url': 'http://www.camdemy.com/media/13885', 'url': 'http://www.camdemy.com/media/13885',
'md5': '4576a3bb2581f86c61044822adbd1249', 'md5': '4576a3bb2581f86c61044822adbd1249',
'info_dict': { 'info_dict': {
@ -41,64 +42,71 @@ class CamdemyIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'EverCam + Camdemy QuickStart', 'title': 'EverCam + Camdemy QuickStart',
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
'description': 'md5:050b62f71ed62928f8a35f1a41e186c9', 'description': 'md5:2a9f989c2b153a2342acee579c6e7db6',
'creator': 'evercam', 'creator': 'evercam',
'upload_date': '20140620', 'duration': 318,
'timestamp': 1403271569,
} }
}, { }, {
# External source # External source (YouTube)
'url': 'http://www.camdemy.com/media/14842', 'url': 'http://www.camdemy.com/media/14842',
'md5': '50e1c3c3aa233d3d7b7daa2fa10b1cf7',
'info_dict': { 'info_dict': {
'id': '2vsYQzNIsJo', 'id': '2vsYQzNIsJo',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Excel 2013 Tutorial - How to add Password Protection',
'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
'upload_date': '20130211', 'upload_date': '20130211',
'uploader': 'Hun Kim', 'uploader': 'Hun Kim',
'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
'uploader_id': 'hunkimtutorials', 'uploader_id': 'hunkimtutorials',
'title': 'Excel 2013 Tutorial - How to add Password Protection', },
} 'params': {
'skip_download': True,
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
page = self._download_webpage(url, video_id)
webpage = self._download_webpage(url, video_id)
src_from = self._html_search_regex( src_from = self._html_search_regex(
r"<div class='srcFrom'>Source: <a title='([^']+)'", page, r"class=['\"]srcFrom['\"][^>]*>Sources?(?:\s+from)?\s*:\s*<a[^>]+(?:href|title)=(['\"])(?P<url>(?:(?!\1).)+)\1",
'external source', default=None) webpage, 'external source', default=None, group='url')
if src_from: if src_from:
return self.url_result(src_from) return self.url_result(src_from)
oembed_obj = self._download_json( oembed_obj = self._download_json(
'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id) 'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
title = oembed_obj['title']
thumb_url = oembed_obj['thumbnail_url'] thumb_url = oembed_obj['thumbnail_url']
video_folder = compat_urlparse.urljoin(thumb_url, 'video/') video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
file_list_doc = self._download_xml( file_list_doc = self._download_xml(
compat_urlparse.urljoin(video_folder, 'fileList.xml'), compat_urlparse.urljoin(video_folder, 'fileList.xml'),
video_id, 'Filelist XML') video_id, 'Downloading filelist XML')
file_name = file_list_doc.find('./video/item/fileName').text file_name = file_list_doc.find('./video/item/fileName').text
video_url = compat_urlparse.urljoin(video_folder, file_name) video_url = compat_urlparse.urljoin(video_folder, file_name)
timestamp = parse_iso8601(self._html_search_regex( # Some URLs return "No permission or not login" in a webpage despite being
r"<div class='title'>Posted\s*:</div>\s*<div class='value'>([^<>]+)<", # freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885)
page, 'creation time', fatal=False), upload_date = unified_strdate(self._search_regex(
delimiter=' ', timezone=datetime.timedelta(hours=8)) r'>published on ([^<]+)<', webpage,
view_count = str_to_int(self._html_search_regex( 'upload date', default=None))
r"<div class='title'>Views\s*:</div>\s*<div class='value'>([^<>]+)<", view_count = str_to_int(self._search_regex(
page, 'view count', fatal=False)) r'role=["\']viewCnt["\'][^>]*>([\d,.]+) views',
webpage, 'view count', default=None))
description = self._html_search_meta(
'description', webpage, default=None) or clean_html(
oembed_obj.get('description'))
return { return {
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,
'title': oembed_obj['title'], 'title': title,
'thumbnail': thumb_url, 'thumbnail': thumb_url,
'description': self._html_search_meta('description', page), 'description': description,
'creator': oembed_obj['author_name'], 'creator': oembed_obj.get('author_name'),
'duration': oembed_obj['duration'], 'duration': parse_duration(oembed_obj.get('duration')),
'timestamp': timestamp, 'upload_date': upload_date,
'view_count': view_count, 'view_count': view_count,
} }

View File

@ -4,9 +4,11 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
js_to_json, js_to_json,
smuggle_url, smuggle_url,
try_get,
) )
@ -27,7 +29,20 @@ class CBCIE(InfoExtractor):
}, },
'skip': 'Geo-restricted to Canada', 'skip': 'Geo-restricted to Canada',
}, { }, {
# with clipId # with clipId, feed available via tpfeed.cbc.ca and feed.theplatform.com
'url': 'http://www.cbc.ca/22minutes/videos/22-minutes-update/22-minutes-update-episode-4',
'md5': '162adfa070274b144f4fdc3c3b8207db',
'info_dict': {
'id': '2414435309',
'ext': 'mp4',
'title': '22 Minutes Update: What Not To Wear Quebec',
'description': "This week's latest Canadian top political story is What Not To Wear Quebec.",
'upload_date': '20131025',
'uploader': 'CBCC-NEW',
'timestamp': 1382717907,
},
}, {
# with clipId, feed only available via tpfeed.cbc.ca
'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live', 'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
'md5': '0274a90b51a9b4971fe005c63f592f12', 'md5': '0274a90b51a9b4971fe005c63f592f12',
'info_dict': { 'info_dict': {
@ -83,9 +98,15 @@ class CBCIE(InfoExtractor):
media_id = player_info.get('mediaId') media_id = player_info.get('mediaId')
if not media_id: if not media_id:
clip_id = player_info['clipId'] clip_id = player_info['clipId']
media_id = self._download_json( feed = self._download_json(
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id, 'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id,
clip_id)['entries'][0]['id'].split('/')[-1] clip_id, fatal=False)
if feed:
media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str)
if not media_id:
media_id = self._download_json(
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
clip_id)['entries'][0]['id'].split('/')[-1]
return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
else: else:
entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)] entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)]

View File

@ -1,5 +1,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .mtv import MTVIE from .mtv import MTVIE
from ..utils import ExtractorError
class CMTIE(MTVIE): class CMTIE(MTVIE):
@ -16,7 +18,27 @@ class CMTIE(MTVIE):
'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"', 'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
'description': 'Blame It All On My Roots', 'description': 'Blame It All On My Roots',
}, },
'skip': 'Video not available',
}, {
'url': 'http://www.cmt.com/videos/misc/1504699/still-the-king-ep-109-in-3-minutes.jhtml#id=1739908',
'md5': 'e61a801ca4a183a466c08bd98dccbb1c',
'info_dict': {
'id': '1504699',
'ext': 'mp4',
'title': 'Still The King Ep. 109 in 3 Minutes',
'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9. New episodes Sundays 9/8c.',
'timestamp': 1469421000.0,
'upload_date': '20160725',
},
}, { }, {
'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172', 'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172',
'only_matching': True, 'only_matching': True,
}] }]
@classmethod
def _transform_rtmp_url(cls, rtmp_video_url):
if 'error_not_available.swf' in rtmp_video_url:
raise ExtractorError(
'%s said: video is not available' % cls.IE_NAME, expected=True)
return super(CMTIE, cls)._transform_rtmp_url(rtmp_video_url)

View File

@ -1,17 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .mtv import MTVServicesInfoExtractor from .mtv import MTVServicesInfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse_urlencode,
)
from ..utils import (
ExtractorError,
float_or_none,
unified_strdate,
)
class ComedyCentralIE(MTVServicesInfoExtractor): class ComedyCentralIE(MTVServicesInfoExtractor):
@ -26,8 +15,10 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
'info_dict': { 'info_dict': {
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354', 'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
'ext': 'mp4', 'ext': 'mp4',
'title': 'CC:Stand-Up|Greg Fitzsimmons: Life on Stage|Uncensored - Too Good of a Mother', 'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother',
'description': 'After a certain point, breastfeeding becomes c**kblocking.', 'description': 'After a certain point, breastfeeding becomes c**kblocking.',
'timestamp': 1376798400,
'upload_date': '20130818',
}, },
}, { }, {
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview', 'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
@ -35,244 +26,43 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
}] }]
class ComedyCentralShowsIE(MTVServicesInfoExtractor): class ToshIE(MTVServicesInfoExtractor):
IE_DESC = 'The Daily Show / The Colbert Report' IE_DESC = 'Tosh.0'
# urls can be abbreviations like :thedailyshow _VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
# urls for episodes like: _FEED_URL = 'http://tosh.cc.com/feeds/mrss'
# or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
# or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
# or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow)
|https?://(:www\.)?
(?P<showname>thedailyshow|thecolbertreport|tosh)\.(?:cc\.)?com/
((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
(?P<clip>
(?:(?:guests/[^/]+|videos|video-(?:clips|playlists)|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
)|
(?P<interview>
extended-interviews/(?P<interID>[0-9a-z]+)/
(?:playlist_tds_extended_)?(?P<interview_title>[^/?#]*?)
(?:/[^/?#]?|[?#]|$))))
'''
_TESTS = [{ _TESTS = [{
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
'info_dict': {
'id': 'ab9ab3e7-5a98-4dbe-8b21-551dc0523d55',
'ext': 'mp4',
'upload_date': '20121213',
'description': 'Kristen Stewart learns to let loose in "On the Road."',
'uploader': 'thedailyshow',
'title': 'thedailyshow kristen-stewart part 1',
}
}, {
'url': 'http://thedailyshow.cc.com/extended-interviews/b6364d/sarah-chayes-extended-interview',
'info_dict': {
'id': 'sarah-chayes-extended-interview',
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
'title': 'thedailyshow Sarah Chayes Extended Interview',
},
'playlist': [
{
'info_dict': {
'id': '0baad492-cbec-4ec1-9e50-ad91c291127f',
'ext': 'mp4',
'upload_date': '20150129',
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
'uploader': 'thedailyshow',
'title': 'thedailyshow sarah-chayes-extended-interview part 1',
},
},
{
'info_dict': {
'id': '1e4fb91b-8ce7-4277-bd7c-98c9f1bbd283',
'ext': 'mp4',
'upload_date': '20150129',
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
'uploader': 'thedailyshow',
'title': 'thedailyshow sarah-chayes-extended-interview part 2',
},
},
],
'params': {
'skip_download': True,
},
}, {
'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
'only_matching': True,
}, {
'url': 'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
'only_matching': True,
}, {
'url': 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
'only_matching': True,
}, {
'url': 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
'only_matching': True,
}, {
'url': 'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
'only_matching': True,
}, {
'url': 'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
'only_matching': True,
}, {
'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
'only_matching': True,
}, {
'url': 'http://thedailyshow.cc.com/video-playlists/t6d9sg/the-daily-show-20038-highlights/be3cwo',
'only_matching': True,
}, {
'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
'only_matching': True,
}, {
'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel',
'only_matching': True,
}, {
'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans', 'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
'info_dict': {
'description': 'Tosh asked fans to share their summer plans.',
'title': 'Twitter Users Share Summer Plans',
},
'playlist': [{
'md5': 'f269e88114c1805bb6d7653fecea9e06',
'info_dict': {
'id': '90498ec2-ed00-11e0-aca6-0026b9414f30',
'ext': 'mp4',
'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans',
'description': 'Tosh asked fans to share their summer plans.',
'thumbnail': 're:^https?://.*\.jpg',
# It's really reported to be published on year 2077
'upload_date': '20770610',
'timestamp': 3390510600,
'subtitles': {
'en': 'mincount:3',
},
},
}]
}, {
'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp',
'only_matching': True, 'only_matching': True,
}] }]
_available_formats = ['3500', '2200', '1700', '1200', '750', '400'] @classmethod
def _transform_rtmp_url(cls, rtmp_video_url):
_video_extensions = { new_urls = super(ToshIE, cls)._transform_rtmp_url(rtmp_video_url)
'3500': 'mp4', new_urls['rtmp'] = rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm')
'2200': 'mp4', return new_urls
'1700': 'mp4',
'1200': 'mp4',
'750': 'mp4',
'400': 'mp4',
}
_video_dimensions = {
'3500': (1280, 720),
'2200': (960, 540),
'1700': (768, 432),
'1200': (640, 360),
'750': (512, 288),
'400': (384, 216),
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj.group('shortname'):
return self.url_result('http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes')
if mobj.group('clip'):
if mobj.group('videotitle'):
epTitle = mobj.group('videotitle')
elif mobj.group('showname') == 'thedailyshow':
epTitle = mobj.group('tdstitle')
else:
epTitle = mobj.group('cntitle')
dlNewest = False
elif mobj.group('interview'):
epTitle = mobj.group('interview_title')
dlNewest = False
else:
dlNewest = not mobj.group('episode')
if dlNewest:
epTitle = mobj.group('showname')
else:
epTitle = mobj.group('episode')
show_name = mobj.group('showname')
webpage, htmlHandle = self._download_webpage_handle(url, epTitle)
if dlNewest:
url = htmlHandle.geturl()
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
if mobj is None:
raise ExtractorError('Invalid redirected URL: ' + url)
if mobj.group('episode') == '':
raise ExtractorError('Redirected URL is still not specific: ' + url)
epTitle = (mobj.group('episode') or mobj.group('videotitle')).rpartition('/')[-1]
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
if len(mMovieParams) == 0:
# The Colbert Report embeds the information in a without
# a URL prefix; so extract the alternate reference
# and then add the URL prefix manually.
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video|playlist).*?:.*?)"', webpage)
if len(altMovieParams) == 0:
raise ExtractorError('unable to find Flash URL in webpage ' + url)
else:
mMovieParams = [('http://media.mtvnservices.com/' + altMovieParams[0], altMovieParams[0])]
uri = mMovieParams[0][1]
# Correct cc.com in uri
uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.com', uri)
index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse_urlencode({'uri': uri}))
idoc = self._download_xml(
index_url, epTitle,
'Downloading show index', 'Unable to download episode index')
title = idoc.find('./channel/title').text
description = idoc.find('./channel/description').text
entries = []
item_els = idoc.findall('.//item')
for part_num, itemEl in enumerate(item_els):
upload_date = unified_strdate(itemEl.findall('./pubDate')[0].text)
thumbnail = itemEl.find('.//{http://search.yahoo.com/mrss/}thumbnail').attrib.get('url')
content = itemEl.find('.//{http://search.yahoo.com/mrss/}content')
duration = float_or_none(content.attrib.get('duration'))
mediagen_url = content.attrib['url']
guid = itemEl.find('./guid').text.rpartition(':')[-1]
cdoc = self._download_xml(
mediagen_url, epTitle,
'Downloading configuration for segment %d / %d' % (part_num + 1, len(item_els)))
turls = []
for rendition in cdoc.findall('.//rendition'):
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
turls.append(finfo)
formats = []
for format, rtmp_video_url in turls:
w, h = self._video_dimensions.get(format, (None, None))
formats.append({
'format_id': 'vhttp-%s' % format,
'url': self._transform_rtmp_url(rtmp_video_url),
'ext': self._video_extensions.get(format, 'mp4'),
'height': h,
'width': w,
})
formats.append({
'format_id': 'rtmp-%s' % format,
'url': rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm'),
'ext': self._video_extensions.get(format, 'mp4'),
'height': h,
'width': w,
})
self._sort_formats(formats)
subtitles = self._extract_subtitles(cdoc, guid)
virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
entries.append({
'id': guid,
'title': virtual_id,
'formats': formats,
'uploader': show_name,
'upload_date': upload_date,
'duration': duration,
'thumbnail': thumbnail,
'description': description,
'subtitles': subtitles,
})
return {
'_type': 'playlist',
'id': epTitle,
'entries': entries,
'title': show_name + ' ' + title,
'description': description,
}
class ComedyCentralTVIE(MTVServicesInfoExtractor): class ComedyCentralTVIE(MTVServicesInfoExtractor):

View File

@ -1481,6 +1481,13 @@ class InfoExtractor(object):
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict) compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict)
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}): def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}):
"""
Parse formats from MPD manifest.
References:
1. MPEG-DASH Standard, ISO/IEC 23009-1:2014(E),
http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
"""
if mpd_doc.get('type') == 'dynamic': if mpd_doc.get('type') == 'dynamic':
return [] return []
@ -1513,8 +1520,16 @@ class InfoExtractor(object):
s_e = segment_timeline.findall(_add_ns('S')) s_e = segment_timeline.findall(_add_ns('S'))
if s_e: if s_e:
ms_info['total_number'] = 0 ms_info['total_number'] = 0
ms_info['s'] = []
for s in s_e: for s in s_e:
ms_info['total_number'] += 1 + int(s.get('r', '0')) r = int(s.get('r', 0))
ms_info['total_number'] += 1 + r
ms_info['s'].append({
't': int(s.get('t', 0)),
# @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
'd': int(s.attrib['d']),
'r': r,
})
else: else:
timescale = segment_template.get('timescale') timescale = segment_template.get('timescale')
if timescale: if timescale:
@ -1551,7 +1566,7 @@ class InfoExtractor(object):
continue continue
representation_attrib = adaptation_set.attrib.copy() representation_attrib = adaptation_set.attrib.copy()
representation_attrib.update(representation.attrib) representation_attrib.update(representation.attrib)
# According to page 41 of ISO/IEC 29001-1:2014, @mimeType is mandatory # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
mime_type = representation_attrib['mimeType'] mime_type = representation_attrib['mimeType']
content_type = mime_type.split('/')[0] content_type = mime_type.split('/')[0]
if content_type == 'text': if content_type == 'text':
@ -1595,16 +1610,40 @@ class InfoExtractor(object):
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
media_template = representation_ms_info['media_template'] media_template = representation_ms_info['media_template']
media_template = media_template.replace('$RepresentationID$', representation_id) media_template = media_template.replace('$RepresentationID$', representation_id)
media_template = re.sub(r'\$(Number|Bandwidth)\$', r'%(\1)d', media_template) media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template)
media_template = re.sub(r'\$(Number|Bandwidth)%([^$]+)\$', r'%(\1)\2', media_template) media_template = re.sub(r'\$(Number|Bandwidth|Time)%([^$]+)\$', r'%(\1)\2', media_template)
media_template.replace('$$', '$') media_template.replace('$$', '$')
representation_ms_info['segment_urls'] = [
media_template % { # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
'Number': segment_number, # can't be used at the same time
'Bandwidth': representation_attrib.get('bandwidth')} if '%(Number' in media_template:
for segment_number in range( representation_ms_info['segment_urls'] = [
representation_ms_info['start_number'], media_template % {
representation_ms_info['total_number'] + representation_ms_info['start_number'])] 'Number': segment_number,
'Bandwidth': representation_attrib.get('bandwidth'),
}
for segment_number in range(
representation_ms_info['start_number'],
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
else:
representation_ms_info['segment_urls'] = []
segment_time = 0
def add_segment_url():
representation_ms_info['segment_urls'].append(
media_template % {
'Time': segment_time,
'Bandwidth': representation_attrib.get('bandwidth'),
}
)
for num, s in enumerate(representation_ms_info['s']):
segment_time = s.get('t') or segment_time
add_segment_url()
for r in range(s.get('r', 0)):
segment_time += s['d']
add_segment_url()
segment_time += s['d']
if 'segment_urls' in representation_ms_info: if 'segment_urls' in representation_ms_info:
f.update({ f.update({
'segment_urls': representation_ms_info['segment_urls'], 'segment_urls': representation_ms_info['segment_urls'],
@ -1747,7 +1786,7 @@ class InfoExtractor(object):
any_restricted = False any_restricted = False
for tc in self.get_testcases(include_onlymatching=False): for tc in self.get_testcases(include_onlymatching=False):
if 'playlist' in tc: if tc.get('playlist', []):
tc = tc['playlist'][0] tc = tc['playlist'][0]
is_restricted = age_restricted( is_restricted = age_restricted(
tc.get('info_dict', {}).get('age_limit'), age_limit) tc.get('info_dict', {}).get('age_limit'), age_limit)

View File

@ -5,19 +5,20 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
determine_protocol, determine_protocol,
unescapeHTML,
) )
class DailyMailIE(InfoExtractor): class DailyMailIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/video/[^/]+/video-(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/video/[^/]+/video-(?P<id>[0-9]+)'
_TEST = { _TEST = {
'url': 'http://www.dailymail.co.uk/video/sciencetech/video-1288527/Turn-video-impressionist-masterpiece.html', 'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html',
'md5': '2f639d446394f53f3a33658b518b6615', 'md5': 'f6129624562251f628296c3a9ffde124',
'info_dict': { 'info_dict': {
'id': '1288527', 'id': '1295863',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Turn any video into an impressionist masterpiece', 'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'',
'description': 'md5:88ddbcb504367987b2708bb38677c9d2', 'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84',
} }
} }
@ -26,7 +27,7 @@ class DailyMailIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_data = self._parse_json(self._search_regex( video_data = self._parse_json(self._search_regex(
r"data-opts='({.+?})'", webpage, 'video data'), video_id) r"data-opts='({.+?})'", webpage, 'video data'), video_id)
title = video_data['title'] title = unescapeHTML(video_data['title'])
video_sources = self._download_json(video_data.get( video_sources = self._download_json(video_data.get(
'sources', {}).get('url') or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id, video_id) 'sources', {}).get('url') or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id, video_id)
@ -55,7 +56,7 @@ class DailyMailIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': video_data.get('descr'), 'description': unescapeHTML(video_data.get('descr')),
'thumbnail': video_data.get('poster') or video_data.get('thumbnail'), 'thumbnail': video_data.get('poster') or video_data.get('thumbnail'),
'formats': formats, 'formats': formats,
} }

View File

@ -62,11 +62,9 @@ class DCNBaseIE(InfoExtractor):
r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8', r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8',
r'<a[^>]+href="rtsp(://[^"]+)"' r'<a[^>]+href="rtsp(://[^"]+)"'
], webpage, 'format url') ], webpage, 'format url')
# TODO: Current DASH formats are broken - $Time$ pattern in formats.extend(self._extract_mpd_formats(
# <SegmentTemplate> not implemented yet format_url_base + '/manifest.mpd',
# formats.extend(self._extract_mpd_formats( video_id, mpd_id='dash', fatal=False))
# format_url_base + '/manifest.mpd',
# video_id, mpd_id='dash', fatal=False))
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
format_url_base + '/playlist.m3u8', video_id, 'mp4', format_url_base + '/playlist.m3u8', video_id, 'mp4',
m3u8_entry_protocol, m3u8_id='hls', fatal=False)) m3u8_entry_protocol, m3u8_id='hls', fatal=False))

View File

@ -4,19 +4,23 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
encode_base_n,
ExtractorError,
int_or_none,
parse_duration, parse_duration,
str_to_int, str_to_int,
) )
class EpornerIE(InfoExtractor): class EpornerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)/(?P<display_id>[\w-]+)' _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
_TESTS = [{ _TESTS = [{
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/', 'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
'md5': '39d486f046212d8e1b911c52ab4691f8', 'md5': '39d486f046212d8e1b911c52ab4691f8',
'info_dict': { 'info_dict': {
'id': '95008', 'id': 'qlDUmNsj6VS',
'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video', 'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Infamous Tiffany Teen Strip Tease Video', 'title': 'Infamous Tiffany Teen Strip Tease Video',
@ -28,34 +32,72 @@ class EpornerIE(InfoExtractor):
# New (May 2016) URL layout # New (May 2016) URL layout
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/', 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
display_id = mobj.group('display_id') display_id = mobj.group('display_id') or video_id
webpage = self._download_webpage(url, display_id) webpage, urlh = self._download_webpage_handle(url, display_id)
title = self._html_search_regex(
r'<title>(.*?) - EPORNER', webpage, 'title')
redirect_url = 'http://www.eporner.com/config5/%s' % video_id video_id = self._match_id(compat_str(urlh.geturl()))
player_code = self._download_webpage(
redirect_url, display_id, note='Downloading player config')
sources = self._search_regex( hash = self._search_regex(
r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', player_code, 'sources') r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
r'<title>(.+?) - EPORNER', webpage, 'title')
# Reverse engineered from vjs.js
def calc_hash(s):
return ''.join((encode_base_n(int(s[lb:lb + 8], 16), 36) for lb in range(0, 32, 8)))
video = self._download_json(
'http://www.eporner.com/xhr/video/%s' % video_id,
display_id, note='Downloading video JSON',
query={
'hash': calc_hash(hash),
'device': 'generic',
'domain': 'www.eporner.com',
'fallback': 'false',
})
if video.get('available') is False:
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, video['message']), expected=True)
sources = video['sources']
formats = [] formats = []
for video_url, format_id in re.findall(r'file\s*:\s*"([^"]+)",\s*label\s*:\s*"([^"]+)"', sources): for kind, formats_dict in sources.items():
fmt = { if not isinstance(formats_dict, dict):
'url': video_url, continue
'format_id': format_id, for format_id, format_dict in formats_dict.items():
} if not isinstance(format_dict, dict):
m = re.search(r'^(\d+)', format_id) continue
if m: src = format_dict.get('src')
fmt['height'] = int(m.group(1)) if not isinstance(src, compat_str) or not src.startswith('http'):
formats.append(fmt) continue
if kind == 'hls':
formats.extend(self._extract_m3u8_formats(
src, display_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id=kind, fatal=False))
else:
height = int_or_none(self._search_regex(
r'(\d+)[pP]', format_id, 'height', default=None))
fps = int_or_none(self._search_regex(
r'(\d+)fps', format_id, 'fps', default=None))
formats.append({
'url': src,
'format_id': format_id,
'height': height,
'fps': fps,
})
self._sort_formats(formats) self._sort_formats(formats)
duration = parse_duration(self._html_search_meta('duration', webpage)) duration = parse_duration(self._html_search_meta('duration', webpage))

View File

@ -44,6 +44,7 @@ from .appletrailers import (
AppleTrailersSectionIE, AppleTrailersSectionIE,
) )
from .archiveorg import ArchiveOrgIE from .archiveorg import ArchiveOrgIE
from .arkena import ArkenaIE
from .ard import ( from .ard import (
ARDIE, ARDIE,
ARDMediathekIE, ARDMediathekIE,
@ -158,8 +159,8 @@ from .coub import CoubIE
from .collegerama import CollegeRamaIE from .collegerama import CollegeRamaIE
from .comedycentral import ( from .comedycentral import (
ComedyCentralIE, ComedyCentralIE,
ComedyCentralShowsIE,
ComedyCentralTVIE, ComedyCentralTVIE,
ToshIE,
) )
from .comcarcoff import ComCarCoffIE from .comcarcoff import ComCarCoffIE
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
@ -397,6 +398,10 @@ from .kuwo import (
) )
from .la7 import LA7IE from .la7 import LA7IE
from .laola1tv import Laola1TvIE from .laola1tv import Laola1TvIE
from .lcp import (
LcpPlayIE,
LcpIE,
)
from .learnr import LearnrIE from .learnr import LearnrIE
from .lecture2go import Lecture2GoIE from .lecture2go import Lecture2GoIE
from .lemonde import LemondeIE from .lemonde import LemondeIE
@ -475,7 +480,6 @@ from .msn import MSNIE
from .mtv import ( from .mtv import (
MTVIE, MTVIE,
MTVServicesEmbeddedIE, MTVServicesEmbeddedIE,
MTVIggyIE,
MTVDEIE, MTVDEIE,
) )
from .muenchentv import MuenchenTVIE from .muenchentv import MuenchenTVIE
@ -525,7 +529,6 @@ from .nextmedia import (
NextMediaActionNewsIE, NextMediaActionNewsIE,
AppleDailyIE, AppleDailyIE,
) )
from .nextmovie import NextMovieIE
from .nfb import NFBIE from .nfb import NFBIE
from .nfl import NFLIE from .nfl import NFLIE
from .nhl import ( from .nhl import (

View File

@ -27,7 +27,7 @@ class FacebookIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?: (?:
https?:// https?://
(?:\w+\.)?facebook\.com/ (?:[\w-]+\.)?facebook\.com/
(?:[^#]*?\#!/)? (?:[^#]*?\#!/)?
(?: (?:
(?: (?:
@ -127,6 +127,9 @@ class FacebookIE(InfoExtractor):
}, { }, {
'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/', 'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
'only_matching': True,
}] }]
@staticmethod @staticmethod

View File

@ -62,6 +62,7 @@ from .videomore import VideomoreIE
from .googledrive import GoogleDriveIE from .googledrive import GoogleDriveIE
from .jwplatform import JWPlatformIE from .jwplatform import JWPlatformIE
from .digiteka import DigitekaIE from .digiteka import DigitekaIE
from .arkena import ArkenaIE
from .instagram import InstagramIE from .instagram import InstagramIE
from .liveleak import LiveLeakIE from .liveleak import LiveLeakIE
from .threeqsdn import ThreeQSDNIE from .threeqsdn import ThreeQSDNIE
@ -70,6 +71,7 @@ from .vessel import VesselIE
from .kaltura import KalturaIE from .kaltura import KalturaIE
from .eagleplatform import EaglePlatformIE from .eagleplatform import EaglePlatformIE
from .facebook import FacebookIE from .facebook import FacebookIE
from .soundcloud import SoundcloudIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -473,7 +475,7 @@ class GenericIE(InfoExtractor):
'url': 'http://www.vestifinance.ru/articles/25753', 'url': 'http://www.vestifinance.ru/articles/25753',
'info_dict': { 'info_dict': {
'id': '25753', 'id': '25753',
'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"', 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
}, },
'playlist': [{ 'playlist': [{
'info_dict': { 'info_dict': {
@ -640,6 +642,8 @@ class GenericIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored', 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
'description': 'Two valets share their love for movie star Liam Neesons.', 'description': 'Two valets share their love for movie star Liam Neesons.',
'timestamp': 1349922600,
'upload_date': '20121011',
}, },
}, },
# YouTube embed via <data-embed-url=""> # YouTube embed via <data-embed-url="">
@ -781,6 +785,15 @@ class GenericIE(InfoExtractor):
'upload_date': '20141029', 'upload_date': '20141029',
} }
}, },
# Soundcloud multiple embeds
{
'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
'info_dict': {
'id': '52809',
'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance | TAB + AUDIO',
},
'playlist_mincount': 7,
},
# Livestream embed # Livestream embed
{ {
'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast', 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
@ -856,6 +869,7 @@ class GenericIE(InfoExtractor):
'description': 'md5:601cb790edd05908957dae8aaa866465', 'description': 'md5:601cb790edd05908957dae8aaa866465',
'upload_date': '20150220', 'upload_date': '20150220',
}, },
'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
}, },
# jwplayer YouTube # jwplayer YouTube
{ {
@ -1342,6 +1356,23 @@ class GenericIE(InfoExtractor):
}, },
'add_ie': ['Vimeo'], 'add_ie': ['Vimeo'],
}, },
{
'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
'info_dict': {
'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
'ext': 'mp4',
'title': 'Big Buck Bunny',
'description': 'Royalty free test video',
'timestamp': 1432816365,
'upload_date': '20150528',
'is_live': False,
},
'params': {
'skip_download': True,
},
'add_ie': [ArkenaIE.ie_key()],
},
# { # {
# # TODO: find another test # # TODO: find another test
# # http://schema.org/VideoObject # # http://schema.org/VideoObject
@ -1978,12 +2009,9 @@ class GenericIE(InfoExtractor):
return self.url_result(myvi_url) return self.url_result(myvi_url)
# Look for embedded soundcloud player # Look for embedded soundcloud player
mobj = re.search( soundcloud_urls = SoundcloudIE._extract_urls(webpage)
r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"', if soundcloud_urls:
webpage) return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
if mobj is not None:
url = unescapeHTML(mobj.group('url'))
return self.url_result(url)
# Look for embedded mtvservices player # Look for embedded mtvservices player
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage) mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
@ -2146,6 +2174,11 @@ class GenericIE(InfoExtractor):
if digiteka_url: if digiteka_url:
return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key()) return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
# Look for Arkena embeds
arkena_url = ArkenaIE._extract_url(webpage)
if arkena_url:
return self.url_result(arkena_url, ArkenaIE.ie_key())
# Look for Limelight embeds # Look for Limelight embeds
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage) mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
if mobj: if mobj:

View File

@ -36,7 +36,6 @@ class InstagramIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'BA-pQFBG8HZ', 'id': 'BA-pQFBG8HZ',
'ext': 'mp4', 'ext': 'mp4',
'uploader_id': 'britneyspears',
'title': 'Video by britneyspears', 'title': 'Video by britneyspears',
'thumbnail': 're:^https?://.*\.jpg', 'thumbnail': 're:^https?://.*\.jpg',
'timestamp': 1453760977, 'timestamp': 1453760977,

View File

@ -0,0 +1,90 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .arkena import ArkenaIE
class LcpPlayIE(ArkenaIE):
_VALID_URL = r'https?://play\.lcp\.fr/embed/(?P<id>[^/]+)/(?P<account_id>[^/]+)/[^/]+/[^/]+'
_TESTS = [{
'url': 'http://play.lcp.fr/embed/327336/131064/darkmatter/0',
'md5': 'b8bd9298542929c06c1c15788b1f277a',
'info_dict': {
'id': '327336',
'ext': 'mp4',
'title': '327336',
'timestamp': 1456391602,
'upload_date': '20160225',
},
'params': {
'skip_download': True,
},
}]
class LcpIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?lcp\.fr/(?:[^/]+/)*(?P<id>[^/]+)'
_TESTS = [{
# arkena embed
'url': 'http://www.lcp.fr/la-politique-en-video/schwartzenberg-prg-preconise-francois-hollande-de-participer-une-primaire',
'md5': 'b8bd9298542929c06c1c15788b1f277a',
'info_dict': {
'id': 'd56d03e9',
'ext': 'mp4',
'title': 'Schwartzenberg (PRG) préconise à François Hollande de participer à une primaire à gauche',
'description': 'md5:96ad55009548da9dea19f4120c6c16a8',
'timestamp': 1456488895,
'upload_date': '20160226',
},
'params': {
'skip_download': True,
},
}, {
# dailymotion live stream
'url': 'http://www.lcp.fr/le-direct',
'info_dict': {
'id': 'xji3qy',
'ext': 'mp4',
'title': 'La Chaine Parlementaire (LCP), Live TNT',
'description': 'md5:5c69593f2de0f38bd9a949f2c95e870b',
'uploader': 'LCP',
'uploader_id': 'xbz33d',
'timestamp': 1308923058,
'upload_date': '20110624',
},
'params': {
# m3u8 live stream
'skip_download': True,
},
}, {
'url': 'http://www.lcp.fr/emissions/277792-les-volontaires',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
play_url = self._search_regex(
r'<iframe[^>]+src=(["\'])(?P<url>%s?(?:(?!\1).)*)\1' % LcpPlayIE._VALID_URL,
webpage, 'play iframe', default=None, group='url')
if not play_url:
return self.url_result(url, 'Generic')
title = self._og_search_title(webpage, default=None) or self._html_search_meta(
'twitter:title', webpage, fatal=True)
description = self._html_search_meta(
('description', 'twitter:description'), webpage)
return {
'_type': 'url_transparent',
'ie_key': LcpPlayIE.ie_key(),
'url': play_url,
'display_id': display_id,
'title': title,
'description': description,
}

View File

@ -9,7 +9,7 @@ class MGTVIE(InfoExtractor):
_VALID_URL = r'https?://www\.mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html' _VALID_URL = r'https?://www\.mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html'
IE_DESC = '芒果TV' IE_DESC = '芒果TV'
_TEST = { _TESTS = [{
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html', 'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
'md5': '1bdadcf760a0b90946ca68ee9a2db41a', 'md5': '1bdadcf760a0b90946ca68ee9a2db41a',
'info_dict': { 'info_dict': {
@ -20,7 +20,11 @@ class MGTVIE(InfoExtractor):
'duration': 7461, 'duration': 7461,
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
}, },
} }, {
# no tbr extracted from stream_url
'url': 'http://www.mgtv.com/v/1/1/f/3324755.html',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -41,7 +45,8 @@ class MGTVIE(InfoExtractor):
def extract_format(stream_url, format_id, idx, query={}): def extract_format(stream_url, format_id, idx, query={}):
format_info = self._download_json( format_info = self._download_json(
stream_url, video_id, stream_url, video_id,
note='Download video info for format %s' % format_id or '#%d' % idx, query=query) note='Download video info for format %s' % (format_id or '#%d' % idx),
query=query)
return { return {
'format_id': format_id, 'format_id': format_id,
'url': format_info['info'], 'url': format_info['info'],

View File

@ -16,6 +16,7 @@ from ..utils import (
HEADRequest, HEADRequest,
sanitized_Request, sanitized_Request,
strip_or_none, strip_or_none,
timeconvert,
unescapeHTML, unescapeHTML,
url_basename, url_basename,
RegexNotFoundError, RegexNotFoundError,
@ -36,13 +37,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
return uri.split(':')[-1] return uri.split(':')[-1]
# This was originally implemented for ComedyCentral, but it also works here # This was originally implemented for ComedyCentral, but it also works here
@staticmethod @classmethod
def _transform_rtmp_url(rtmp_video_url): def _transform_rtmp_url(cls, rtmp_video_url):
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url) m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
if not m: if not m:
return rtmp_video_url return {'rtmp': rtmp_video_url}
base = 'http://viacommtvstrmfs.fplive.net/' base = 'http://viacommtvstrmfs.fplive.net/'
return base + m.group('finalid') return {'http': base + m.group('finalid')}
def _get_feed_url(self, uri): def _get_feed_url(self, uri):
return self._FEED_URL return self._FEED_URL
@ -86,14 +87,14 @@ class MTVServicesInfoExtractor(InfoExtractor):
rtmp_video_url = rendition.find('./src').text rtmp_video_url = rendition.find('./src').text
if rtmp_video_url.endswith('siteunavail.png'): if rtmp_video_url.endswith('siteunavail.png'):
continue continue
new_url = self._transform_rtmp_url(rtmp_video_url) new_urls = self._transform_rtmp_url(rtmp_video_url)
formats.append({ formats.extend([{
'ext': 'flv' if new_url.startswith('rtmp') else ext, 'ext': 'flv' if new_url.startswith('rtmp') else ext,
'url': new_url, 'url': new_url,
'format_id': rendition.get('bitrate'), 'format_id': '-'.join(filter(None, [kind, rendition.get('bitrate')])),
'width': int(rendition.get('width')), 'width': int(rendition.get('width')),
'height': int(rendition.get('height')), 'height': int(rendition.get('height')),
}) } for kind, new_url in new_urls.items()])
except (KeyError, TypeError): except (KeyError, TypeError):
raise ExtractorError('Invalid rendition field.') raise ExtractorError('Invalid rendition field.')
self._sort_formats(formats) self._sort_formats(formats)
@ -136,6 +137,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
description = strip_or_none(xpath_text(itemdoc, 'description')) description = strip_or_none(xpath_text(itemdoc, 'description'))
timestamp = timeconvert(xpath_text(itemdoc, 'pubDate'))
title_el = None title_el = None
if title_el is None: if title_el is None:
title_el = find_xpath_attr( title_el = find_xpath_attr(
@ -168,6 +171,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
'thumbnail': self._get_thumbnail_url(uri, itemdoc), 'thumbnail': self._get_thumbnail_url(uri, itemdoc),
'description': description, 'description': description,
'duration': float_or_none(content_el.attrib.get('duration')), 'duration': float_or_none(content_el.attrib.get('duration')),
'timestamp': timestamp,
} }
def _get_feed_query(self, uri): def _get_feed_query(self, uri):
@ -186,8 +190,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
idoc = self._download_xml( idoc = self._download_xml(
url, video_id, url, video_id,
'Downloading info', transform_source=fix_xml_ampersands) 'Downloading info', transform_source=fix_xml_ampersands)
title = xpath_text(idoc, './channel/title')
description = xpath_text(idoc, './channel/description')
return self.playlist_result( return self.playlist_result(
[self._get_video_info(item) for item in idoc.findall('.//item')]) [self._get_video_info(item) for item in idoc.findall('.//item')],
playlist_title=title, playlist_description=description)
def _extract_mgid(self, webpage): def _extract_mgid(self, webpage):
try: try:
@ -233,6 +242,8 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Peter Dinklage Sums Up \'Game Of Thrones\' In 45 Seconds', 'title': 'Peter Dinklage Sums Up \'Game Of Thrones\' In 45 Seconds',
'description': '"Sexy sexy sexy, stabby stabby stabby, beautiful language," says Peter Dinklage as he tries summarizing "Game of Thrones" in under a minute.', 'description': '"Sexy sexy sexy, stabby stabby stabby, beautiful language," says Peter Dinklage as he tries summarizing "Game of Thrones" in under a minute.',
'timestamp': 1400126400,
'upload_date': '20140515',
}, },
} }
@ -275,6 +286,8 @@ class MTVIE(MTVServicesInfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Taylor Swift - "Ours (VH1 Storytellers)"', 'title': 'Taylor Swift - "Ours (VH1 Storytellers)"',
'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.', 'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
'timestamp': 1352610000,
'upload_date': '20121111',
}, },
}, },
] ]
@ -301,20 +314,6 @@ class MTVIE(MTVServicesInfoExtractor):
return self._get_videos_info(uri) return self._get_videos_info(uri)
class MTVIggyIE(MTVServicesInfoExtractor):
IE_NAME = 'mtviggy.com'
_VALID_URL = r'https?://www\.mtviggy\.com/videos/.+'
_TEST = {
'url': 'http://www.mtviggy.com/videos/arcade-fire-behind-the-scenes-at-the-biggest-music-experiment-yet/',
'info_dict': {
'id': '984696',
'ext': 'mp4',
'title': 'Arcade Fire: Behind the Scenes at the Biggest Music Experiment Yet',
}
}
_FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/'
class MTVDEIE(MTVServicesInfoExtractor): class MTVDEIE(MTVServicesInfoExtractor):
IE_NAME = 'mtv.de' IE_NAME = 'mtv.de'
_VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:artists|shows|news)/(?:[^/]+/)*(?P<id>\d+)-[^/#?]+/*(?:[#?].*)?$' _VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:artists|shows|news)/(?:[^/]+/)*(?P<id>\d+)-[^/#?]+/*(?:[#?].*)?$'
@ -322,7 +321,7 @@ class MTVDEIE(MTVServicesInfoExtractor):
'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum', 'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum',
'info_dict': { 'info_dict': {
'id': 'music_video-a50bc5f0b3aa4b3190aa', 'id': 'music_video-a50bc5f0b3aa4b3190aa',
'ext': 'mp4', 'ext': 'flv',
'title': 'MusicVideo_cro-traum', 'title': 'MusicVideo_cro-traum',
'description': 'Cro - Traum', 'description': 'Cro - Traum',
}, },
@ -330,20 +329,21 @@ class MTVDEIE(MTVServicesInfoExtractor):
# rtmp download # rtmp download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Blocked at Travis CI',
}, { }, {
# mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97) # mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97)
'url': 'http://www.mtv.de/shows/933-teen-mom-2/staffeln/5353/folgen/63565-enthullungen', 'url': 'http://www.mtv.de/shows/933-teen-mom-2/staffeln/5353/folgen/63565-enthullungen',
'info_dict': { 'info_dict': {
'id': 'local_playlist-f5ae778b9832cc837189', 'id': 'local_playlist-f5ae778b9832cc837189',
'ext': 'mp4', 'ext': 'flv',
'title': 'Episode_teen-mom-2_shows_season-5_episode-1_full-episode_part1', 'title': 'Episode_teen-mom-2_shows_season-5_episode-1_full-episode_part1',
}, },
'params': { 'params': {
# rtmp download # rtmp download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Blocked at Travis CI',
}, { }, {
# single video in pagePlaylist with different id
'url': 'http://www.mtv.de/news/77491-mtv-movies-spotlight-pixels-teil-3', 'url': 'http://www.mtv.de/news/77491-mtv-movies-spotlight-pixels-teil-3',
'info_dict': { 'info_dict': {
'id': 'local_playlist-4e760566473c4c8c5344', 'id': 'local_playlist-4e760566473c4c8c5344',
@ -355,6 +355,7 @@ class MTVDEIE(MTVServicesInfoExtractor):
# rtmp download # rtmp download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Das Video kann zur Zeit nicht abgespielt werden.',
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -367,11 +368,14 @@ class MTVDEIE(MTVServicesInfoExtractor):
r'window\.pagePlaylist\s*=\s*(\[.+?\]);\n', webpage, 'page playlist'), r'window\.pagePlaylist\s*=\s*(\[.+?\]);\n', webpage, 'page playlist'),
video_id) video_id)
def _mrss_url(item):
return item['mrss'] + item.get('mrssvars', '')
# news pages contain single video in playlist with different id # news pages contain single video in playlist with different id
if len(playlist) == 1: if len(playlist) == 1:
return self._get_videos_info_from_url(playlist[0]['mrss'], video_id) return self._get_videos_info_from_url(_mrss_url(playlist[0]), video_id)
for item in playlist: for item in playlist:
item_id = item.get('id') item_id = item.get('id')
if item_id and compat_str(item_id) == video_id: if item_id and compat_str(item_id) == video_id:
return self._get_videos_info_from_url(item['mrss'], video_id) return self._get_videos_info_from_url(_mrss_url(item), video_id)

View File

@ -1,30 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .mtv import MTVServicesInfoExtractor
from ..compat import compat_urllib_parse_urlencode
class NextMovieIE(MTVServicesInfoExtractor):
IE_NAME = 'nextmovie.com'
_VALID_URL = r'https?://(?:www\.)?nextmovie\.com/shows/[^/]+/\d{4}-\d{2}-\d{2}/(?P<id>[^/?#]+)'
_FEED_URL = 'http://lite.dextr.mtvi.com/service1/dispatch.htm'
_TESTS = [{
'url': 'http://www.nextmovie.com/shows/exclusives/2013-03-10/mgid:uma:videolist:nextmovie.com:1715019/',
'md5': '09a9199f2f11f10107d04fcb153218aa',
'info_dict': {
'id': '961726',
'ext': 'mp4',
'title': 'The Muppets\' Gravity',
},
}]
def _get_feed_query(self, uri):
return compat_urllib_parse_urlencode({
'feed': '1505',
'mgid': uri,
})
def _real_extract(self, url):
mgid = self._match_id(url)
return self._get_videos_info(mgid)

View File

@ -7,6 +7,7 @@ from ..utils import update_url_query
class NickIE(MTVServicesInfoExtractor): class NickIE(MTVServicesInfoExtractor):
# None of videos on the website are still alive?
IE_NAME = 'nick.com' IE_NAME = 'nick.com'
_VALID_URL = r'https?://(?:www\.)?nick(?:jr)?\.com/(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)' _VALID_URL = r'https?://(?:www\.)?nick(?:jr)?\.com/(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)'
_FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm' _FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm'

View File

@ -59,11 +59,8 @@ class OnetBaseIE(InfoExtractor):
# TODO: Support Microsoft Smooth Streaming # TODO: Support Microsoft Smooth Streaming
continue continue
elif ext == 'mpd': elif ext == 'mpd':
# TODO: Current DASH formats are broken - $Time$ pattern in formats.extend(self._extract_mpd_formats(
# <SegmentTemplate> not implemented yet video_url, video_id, mpd_id='dash', fatal=False))
# formats.extend(self._extract_mpd_formats(
# video_url, video_id, mpd_id='dash', fatal=False))
continue
else: else:
formats.append({ formats.append({
'url': video_url, 'url': video_url,

View File

@ -137,13 +137,16 @@ class ORFTVthekIE(InfoExtractor):
class ORFOE1IE(InfoExtractor): class ORFOE1IE(InfoExtractor):
IE_NAME = 'orf:oe1' IE_NAME = 'orf:oe1'
IE_DESC = 'Radio Österreich 1' IE_DESC = 'Radio Österreich 1'
_VALID_URL = r'https?://oe1\.orf\.at/(?:programm/|konsole.*?#\?track_id=)(?P<id>[0-9]+)' _VALID_URL = r'https?://oe1\.orf\.at/(?:programm/|konsole\?.*?\btrack_id=)(?P<id>[0-9]+)'
# Audios on ORF radio are only available for 7 days, so we can't add tests. # Audios on ORF radio are only available for 7 days, so we can't add tests.
_TEST = { _TESTS = [{
'url': 'http://oe1.orf.at/konsole?show=on_demand#?track_id=394211', 'url': 'http://oe1.orf.at/konsole?show=on_demand#?track_id=394211',
'only_matching': True, 'only_matching': True,
} }, {
'url': 'http://oe1.orf.at/konsole?show=ondemand&track_id=443608&load_day=/programm/konsole/tag/20160726',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
show_id = self._match_id(url) show_id = self._match_id(url)

View File

@ -111,7 +111,7 @@ class PornHubIE(InfoExtractor):
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)
error_msg = self._html_search_regex( error_msg = self._html_search_regex(
r'(?s)<div[^>]+class=(["\']).*?\b(?:removed|userMessageSection)\b.*?\1[^>]*>(?P<error>.+?)</div>', r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
webpage, 'error message', default=None, group='error') webpage, 'error message', default=None, group='error')
if error_msg: if error_msg:
error_msg = re.sub(r'\s+', ' ', error_msg) error_msg = re.sub(r'\s+', ' ', error_msg)

View File

@ -6,7 +6,6 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
sanitized_Request,
urlencode_postdata, urlencode_postdata,
) )
@ -37,28 +36,33 @@ class SharedIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
webpage, urlh = self._download_webpage_handle(url, video_id)
if '>File does not exist<' in webpage: if '>File does not exist<' in webpage:
raise ExtractorError( raise ExtractorError(
'Video %s does not exist' % video_id, expected=True) 'Video %s does not exist' % video_id, expected=True)
download_form = self._hidden_inputs(webpage) download_form = self._hidden_inputs(webpage)
request = sanitized_Request(
url, urlencode_postdata(download_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
video_page = self._download_webpage( video_page = self._download_webpage(
request, video_id, 'Downloading video page') urlh.geturl(), video_id, 'Downloading video page',
data=urlencode_postdata(download_form),
headers={
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': urlh.geturl(),
})
video_url = self._html_search_regex( video_url = self._html_search_regex(
r'data-url="([^"]+)"', video_page, 'video URL') r'data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
video_page, 'video URL', group='url')
title = base64.b64decode(self._html_search_meta( title = base64.b64decode(self._html_search_meta(
'full:title', webpage, 'title').encode('utf-8')).decode('utf-8') 'full:title', webpage, 'title').encode('utf-8')).decode('utf-8')
filesize = int_or_none(self._html_search_meta( filesize = int_or_none(self._html_search_meta(
'full:size', webpage, 'file size', fatal=False)) 'full:size', webpage, 'file size', fatal=False))
thumbnail = self._html_search_regex( thumbnail = self._html_search_regex(
r'data-poster="([^"]+)"', video_page, 'thumbnail', default=None) r'data-poster=(["\'])(?P<url>(?:(?!\1).)+)\1',
video_page, 'thumbnail', default=None, group='url')
return { return {
'id': video_id, 'id': video_id,

View File

@ -13,20 +13,21 @@ from ..utils import (
sanitized_Request, sanitized_Request,
unified_strdate, unified_strdate,
urlencode_postdata, urlencode_postdata,
xpath_text,
) )
class SmotriIE(InfoExtractor): class SmotriIE(InfoExtractor):
IE_DESC = 'Smotri.com' IE_DESC = 'Smotri.com'
IE_NAME = 'smotri' IE_NAME = 'smotri'
_VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})' _VALID_URL = r'https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
_NETRC_MACHINE = 'smotri' _NETRC_MACHINE = 'smotri'
_TESTS = [ _TESTS = [
# real video id 2610366 # real video id 2610366
{ {
'url': 'http://smotri.com/video/view/?id=v261036632ab', 'url': 'http://smotri.com/video/view/?id=v261036632ab',
'md5': '2a7b08249e6f5636557579c368040eb9', 'md5': '02c0dfab2102984e9c5bb585cc7cc321',
'info_dict': { 'info_dict': {
'id': 'v261036632ab', 'id': 'v261036632ab',
'ext': 'mp4', 'ext': 'mp4',
@ -174,11 +175,11 @@ class SmotriIE(InfoExtractor):
if video_password: if video_password:
video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest() video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest()
request = sanitized_Request( video = self._download_json(
'http://smotri.com/video/view/url/bot/', urlencode_postdata(video_form)) 'http://smotri.com/video/view/url/bot/',
request.add_header('Content-Type', 'application/x-www-form-urlencoded') video_id, 'Downloading video JSON',
data=urlencode_postdata(video_form),
video = self._download_json(request, video_id, 'Downloading video JSON') headers={'Content-Type': 'application/x-www-form-urlencoded'})
video_url = video.get('_vidURL') or video.get('_vidURL_mp4') video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
@ -196,11 +197,11 @@ class SmotriIE(InfoExtractor):
raise ExtractorError(msg, expected=True) raise ExtractorError(msg, expected=True)
title = video['title'] title = video['title']
thumbnail = video['_imgURL'] thumbnail = video.get('_imgURL')
upload_date = unified_strdate(video['added']) upload_date = unified_strdate(video.get('added'))
uploader = video['userNick'] uploader = video.get('userNick')
uploader_id = video['userLogin'] uploader_id = video.get('userLogin')
duration = int_or_none(video['duration']) duration = int_or_none(video.get('duration'))
# Video JSON does not provide enough meta data # Video JSON does not provide enough meta data
# We will extract some from the video web page instead # We will extract some from the video web page instead
@ -209,7 +210,7 @@ class SmotriIE(InfoExtractor):
# Warning if video is unavailable # Warning if video is unavailable
warning = self._html_search_regex( warning = self._html_search_regex(
r'<div class="videoUnModer">(.*?)</div>', webpage, r'<div[^>]+class="videoUnModer"[^>]*>(.+?)</div>', webpage,
'warning message', default=None) 'warning message', default=None)
if warning is not None: if warning is not None:
self._downloader.report_warning( self._downloader.report_warning(
@ -217,20 +218,22 @@ class SmotriIE(InfoExtractor):
(video_id, warning)) (video_id, warning))
# Adult content # Adult content
if re.search('EroConfirmText">', webpage) is not None: if 'EroConfirmText">' in webpage:
self.report_age_confirmation() self.report_age_confirmation()
confirm_string = self._html_search_regex( confirm_string = self._html_search_regex(
r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id, r'<a[^>]+href="/video/view/\?id=%s&confirm=([^"]+)"' % video_id,
webpage, 'confirm string') webpage, 'confirm string')
confirm_url = webpage_url + '&confirm=%s' % confirm_string confirm_url = webpage_url + '&confirm=%s' % confirm_string
webpage = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)') webpage = self._download_webpage(
confirm_url, video_id,
'Downloading video page (age confirmed)')
adult_content = True adult_content = True
else: else:
adult_content = False adult_content = False
view_count = self._html_search_regex( view_count = self._html_search_regex(
'Общее количество просмотров.*?<span class="Number">(\\d+)</span>', r'(?s)Общее количество просмотров.*?<span class="Number">(\d+)</span>',
webpage, 'view count', fatal=False, flags=re.MULTILINE | re.DOTALL) webpage, 'view count', fatal=False)
return { return {
'id': video_id, 'id': video_id,
@ -249,37 +252,33 @@ class SmotriIE(InfoExtractor):
class SmotriCommunityIE(InfoExtractor): class SmotriCommunityIE(InfoExtractor):
IE_DESC = 'Smotri.com community videos' IE_DESC = 'Smotri.com community videos'
IE_NAME = 'smotri:community' IE_NAME = 'smotri:community'
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)' _VALID_URL = r'https?://(?:www\.)?smotri\.com/community/video/(?P<id>[0-9A-Za-z_\'-]+)'
_TEST = { _TEST = {
'url': 'http://smotri.com/community/video/kommuna', 'url': 'http://smotri.com/community/video/kommuna',
'info_dict': { 'info_dict': {
'id': 'kommuna', 'id': 'kommuna',
'title': 'КПРФ',
}, },
'playlist_mincount': 4, 'playlist_mincount': 4,
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) community_id = self._match_id(url)
community_id = mobj.group('communityid')
url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id rss = self._download_xml(
rss = self._download_xml(url, community_id, 'Downloading community RSS') 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id,
community_id, 'Downloading community RSS')
entries = [self.url_result(video_url.text, 'Smotri') entries = [
for video_url in rss.findall('./channel/item/link')] self.url_result(video_url.text, SmotriIE.ie_key())
for video_url in rss.findall('./channel/item/link')]
description_text = rss.find('./channel/description').text return self.playlist_result(entries, community_id)
community_title = self._html_search_regex(
'^Видео сообщества "([^"]+)"$', description_text, 'community title')
return self.playlist_result(entries, community_id, community_title)
class SmotriUserIE(InfoExtractor): class SmotriUserIE(InfoExtractor):
IE_DESC = 'Smotri.com user videos' IE_DESC = 'Smotri.com user videos'
IE_NAME = 'smotri:user' IE_NAME = 'smotri:user'
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)' _VALID_URL = r'https?://(?:www\.)?smotri\.com/user/(?P<id>[0-9A-Za-z_\'-]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://smotri.com/user/inspector', 'url': 'http://smotri.com/user/inspector',
'info_dict': { 'info_dict': {
@ -290,19 +289,19 @@ class SmotriUserIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) user_id = self._match_id(url)
user_id = mobj.group('userid')
url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id rss = self._download_xml(
rss = self._download_xml(url, user_id, 'Downloading user RSS') 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id,
user_id, 'Downloading user RSS')
entries = [self.url_result(video_url.text, 'Smotri') entries = [self.url_result(video_url.text, 'Smotri')
for video_url in rss.findall('./channel/item/link')] for video_url in rss.findall('./channel/item/link')]
description_text = rss.find('./channel/description').text description_text = xpath_text(rss, './channel/description') or ''
user_nickname = self._html_search_regex( user_nickname = self._search_regex(
'^Видео режиссера (.*)$', description_text, '^Видео режиссера (.+)$', description_text,
'user nickname') 'user nickname', fatal=False)
return self.playlist_result(entries, user_id, user_nickname) return self.playlist_result(entries, user_id, user_nickname)
@ -310,11 +309,11 @@ class SmotriUserIE(InfoExtractor):
class SmotriBroadcastIE(InfoExtractor): class SmotriBroadcastIE(InfoExtractor):
IE_DESC = 'Smotri.com broadcasts' IE_DESC = 'Smotri.com broadcasts'
IE_NAME = 'smotri:broadcast' IE_NAME = 'smotri:broadcast'
_VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<broadcastid>[^/]+))/?.*' _VALID_URL = r'https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<id>[^/]+))/?.*'
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
broadcast_id = mobj.group('broadcastid') broadcast_id = mobj.group('id')
broadcast_url = 'http://' + mobj.group('url') broadcast_url = 'http://' + mobj.group('url')
broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page') broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')
@ -328,7 +327,8 @@ class SmotriBroadcastIE(InfoExtractor):
(username, password) = self._get_login_info() (username, password) = self._get_login_info()
if username is None: if username is None:
self.raise_login_required('Erotic broadcasts allowed only for registered users') self.raise_login_required(
'Erotic broadcasts allowed only for registered users')
login_form = { login_form = {
'login-hint53': '1', 'login-hint53': '1',
@ -343,8 +343,9 @@ class SmotriBroadcastIE(InfoExtractor):
broadcast_page = self._download_webpage( broadcast_page = self._download_webpage(
request, broadcast_id, 'Logging in and confirming age') request, broadcast_id, 'Logging in and confirming age')
if re.search('>Неверный логин или пароль<', broadcast_page) is not None: if '>Неверный логин или пароль<' in broadcast_page:
raise ExtractorError('Unable to log in: bad username or password', expected=True) raise ExtractorError(
'Unable to log in: bad username or password', expected=True)
adult_content = True adult_content = True
else: else:
@ -383,11 +384,11 @@ class SmotriBroadcastIE(InfoExtractor):
broadcast_playpath = broadcast_json['_streamName'] broadcast_playpath = broadcast_json['_streamName']
broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL']) broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL'])
broadcast_thumbnail = broadcast_json['_imgURL'] broadcast_thumbnail = broadcast_json.get('_imgURL')
broadcast_title = self._live_title(broadcast_json['title']) broadcast_title = self._live_title(broadcast_json['title'])
broadcast_description = broadcast_json['description'] broadcast_description = broadcast_json.get('description')
broadcaster_nick = broadcast_json['nick'] broadcaster_nick = broadcast_json.get('nick')
broadcaster_login = broadcast_json['login'] broadcaster_login = broadcast_json.get('login')
rtmp_conn = 'S:%s' % uuid.uuid4().hex rtmp_conn = 'S:%s' % uuid.uuid4().hex
except KeyError: except KeyError:
if protected_broadcast: if protected_broadcast:

View File

@ -119,6 +119,12 @@ class SoundcloudIE(InfoExtractor):
_CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea' _CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea'
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf' _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
@staticmethod
def _extract_urls(webpage):
return [m.group('url') for m in re.finditer(
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
webpage)]
def report_resolve(self, video_id): def report_resolve(self, video_id):
"""Report information extraction.""" """Report information extraction."""
self.to_screen('%s: Resolving id' % video_id) self.to_screen('%s: Resolving id' % video_id)

View File

@ -17,6 +17,8 @@ class SouthParkIE(MTVServicesInfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'South Park|Bat Daded', 'title': 'South Park|Bat Daded',
'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.', 'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.',
'timestamp': 1112760000,
'upload_date': '20050406',
}, },
}] }]
@ -28,6 +30,10 @@ class SouthParkEsIE(SouthParkIE):
_TESTS = [{ _TESTS = [{
'url': 'http://southpark.cc.com/episodios-en-espanol/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate', 'url': 'http://southpark.cc.com/episodios-en-espanol/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate',
'info_dict': {
'title': 'Cartman Consigue Una Sonda Anal',
'description': 'Cartman Consigue Una Sonda Anal',
},
'playlist_count': 4, 'playlist_count': 4,
}] }]
@ -42,17 +48,27 @@ class SouthParkDeIE(SouthParkIE):
'info_dict': { 'info_dict': {
'id': '85487c96-b3b9-4e39-9127-ad88583d9bf2', 'id': '85487c96-b3b9-4e39-9127-ad88583d9bf2',
'ext': 'mp4', 'ext': 'mp4',
'title': 'The Government Won\'t Respect My Privacy', 'title': 'South Park|The Government Won\'t Respect My Privacy',
'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.', 'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
'timestamp': 1380160800,
'upload_date': '20130926',
}, },
}, { }, {
# non-ASCII characters in initial URL # non-ASCII characters in initial URL
'url': 'http://www.southpark.de/alle-episoden/s18e09-hashtag-aufwärmen', 'url': 'http://www.southpark.de/alle-episoden/s18e09-hashtag-aufwärmen',
'playlist_count': 4, 'info_dict': {
'title': 'Hashtag „Aufwärmen“',
'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.',
},
'playlist_count': 3,
}, { }, {
# non-ASCII characters in redirect URL # non-ASCII characters in redirect URL
'url': 'http://www.southpark.de/alle-episoden/s18e09', 'url': 'http://www.southpark.de/alle-episoden/s18e09',
'playlist_count': 4, 'info_dict': {
'title': 'Hashtag „Aufwärmen“',
'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.',
},
'playlist_count': 3,
}] }]
@ -63,7 +79,11 @@ class SouthParkNlIE(SouthParkIE):
_TESTS = [{ _TESTS = [{
'url': 'http://www.southpark.nl/full-episodes/s18e06-freemium-isnt-free', 'url': 'http://www.southpark.nl/full-episodes/s18e06-freemium-isnt-free',
'playlist_count': 4, 'info_dict': {
'title': 'Freemium Isn\'t Free',
'description': 'Stan is addicted to the new Terrance and Phillip mobile game.',
},
'playlist_mincount': 3,
}] }]
@ -74,5 +94,9 @@ class SouthParkDkIE(SouthParkIE):
_TESTS = [{ _TESTS = [{
'url': 'http://www.southparkstudios.dk/full-episodes/s18e07-grounded-vindaloop', 'url': 'http://www.southparkstudios.dk/full-episodes/s18e07-grounded-vindaloop',
'playlist_count': 4, 'info_dict': {
'title': 'Grounded Vindaloop',
'description': 'Butters is convinced he\'s living in a virtual reality.',
},
'playlist_mincount': 3,
}] }]

View File

@ -11,8 +11,10 @@ class SpikeIE(MTVServicesInfoExtractor):
'info_dict': { 'info_dict': {
'id': 'b9c8221a-4e50-479a-b86d-3333323e38ba', 'id': 'b9c8221a-4e50-479a-b86d-3333323e38ba',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Auction Hunters|Can Allen Ride A Hundred Year-Old Motorcycle?', 'title': 'Auction Hunters|December 27, 2013|4|414|Can Allen Ride A Hundred Year-Old Motorcycle?',
'description': 'md5:fbed7e82ed5fad493615b3094a9499cb', 'description': 'md5:fbed7e82ed5fad493615b3094a9499cb',
'timestamp': 1388120400,
'upload_date': '20131227',
}, },
}, { }, {
'url': 'http://www.spike.com/video-clips/lhtu8m/', 'url': 'http://www.spike.com/video-clips/lhtu8m/',

View File

@ -47,11 +47,10 @@ class TelegraafIE(InfoExtractor):
ext = determine_ext(manifest_url) ext = determine_ext(manifest_url)
if ext == 'm3u8': if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
manifest_url, video_id, ext='mp4', m3u8_id='hls')) manifest_url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
elif ext == 'mpd': elif ext == 'mpd':
# TODO: Current DASH formats are broken - $Time$ pattern in formats.extend(self._extract_mpd_formats(
# <SegmentTemplate> not implemented yet manifest_url, video_id, mpd_id='dash', fatal=False))
continue
else: else:
self.report_warning('Unknown adaptive format %s' % ext) self.report_warning('Unknown adaptive format %s' % ext)
for location in locations.get('progressive', []): for location in locations.get('progressive', []):

View File

@ -9,56 +9,23 @@ class TVLandIE(MTVServicesInfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)' _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)'
_FEED_URL = 'http://www.tvland.com/feeds/mrss/' _FEED_URL = 'http://www.tvland.com/feeds/mrss/'
_TESTS = [{ _TESTS = [{
# Geo-restricted. Without a proxy metadata are still there. With a
# proxy it redirects to http://m.tvland.com/app/
'url': 'http://www.tvland.com/episodes/hqhps2/everybody-loves-raymond-the-invasion-ep-048', 'url': 'http://www.tvland.com/episodes/hqhps2/everybody-loves-raymond-the-invasion-ep-048',
'playlist': [ 'info_dict': {
{ 'description': 'md5:80973e81b916a324e05c14a3fb506d29',
'md5': '227e9723b9669c05bf51098b10287aa7', 'title': 'The Invasion',
'info_dict': { },
'id': 'bcbd3a83-3aca-4dca-809b-f78a87dcccdd', 'playlist': [],
'ext': 'mp4',
'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 1 of 5',
}
},
{
'md5': '9fa2b764ec0e8194fb3ebb01a83df88b',
'info_dict': {
'id': 'f4279548-6e13-40dd-92e8-860d27289197',
'ext': 'mp4',
'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 2 of 5',
}
},
{
'md5': 'fde4c3bccd7cc7e3576b338734153cec',
'info_dict': {
'id': '664e4a38-53ef-4115-9bc9-d0f789ec6334',
'ext': 'mp4',
'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 3 of 5',
}
},
{
'md5': '247f6780cda6891f2e49b8ae2b10e017',
'info_dict': {
'id': '9146ecf5-b15a-4d78-879c-6679b77f4960',
'ext': 'mp4',
'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 4 of 5',
}
},
{
'md5': 'fd269f33256e47bad5eb6c40de089ff6',
'info_dict': {
'id': '04334a2e-9a47-4214-a8c2-ae5792e2fab7',
'ext': 'mp4',
'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 5 of 5',
}
}
],
}, { }, {
'url': 'http://www.tvland.com/video-clips/zea2ev/younger-younger--hilary-duff---little-lies', 'url': 'http://www.tvland.com/video-clips/zea2ev/younger-younger--hilary-duff---little-lies',
'md5': 'e2c6389401cf485df26c79c247b08713', 'md5': 'e2c6389401cf485df26c79c247b08713',
'info_dict': { 'info_dict': {
'id': 'b8697515-4bbe-4e01-83d5-fa705ce5fa88', 'id': 'b8697515-4bbe-4e01-83d5-fa705ce5fa88',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Younger|Younger: Hilary Duff - Little Lies', 'title': 'Younger|December 28, 2015|2|NO-EPISODE#|Younger: Hilary Duff - Little Lies',
'description': 'md5:7d192f56ca8d958645c83f0de8ef0269' 'description': 'md5:7d192f56ca8d958645c83f0de8ef0269',
'upload_date': '20151228',
'timestamp': 1451289600,
}, },
}] }]

View File

@ -89,8 +89,8 @@ class TVPIE(InfoExtractor):
r'(https?://.+?/video)(?:\.(?:ism|f4m|m3u8)|-\d+\.mp4)', r'(https?://.+?/video)(?:\.(?:ism|f4m|m3u8)|-\d+\.mp4)',
video_url, 'video base url', default=None) video_url, 'video base url', default=None)
if video_url_base: if video_url_base:
# TODO: Current DASH formats are broken - $Time$ pattern in # TODO: <Group> found instead of <AdaptationSet> in MPD manifest.
# <SegmentTemplate> not implemented yet # It's not mentioned in MPEG-DASH standard. Figure that out.
# formats.extend(self._extract_mpd_formats( # formats.extend(self._extract_mpd_formats(
# video_url_base + '.ism/video.mpd', # video_url_base + '.ism/video.mpd',
# video_id, mpd_id='dash', fatal=False)) # video_id, mpd_id='dash', fatal=False))

View File

@ -461,7 +461,7 @@ class TwitchClipsIE(InfoExtractor):
IE_NAME = 'twitch:clips' IE_NAME = 'twitch:clips'
_VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)' _VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TEST = { _TESTS = [{
'url': 'https://clips.twitch.tv/ea/AggressiveCobraPoooound', 'url': 'https://clips.twitch.tv/ea/AggressiveCobraPoooound',
'md5': '761769e1eafce0ffebfb4089cb3847cd', 'md5': '761769e1eafce0ffebfb4089cb3847cd',
'info_dict': { 'info_dict': {
@ -473,7 +473,11 @@ class TwitchClipsIE(InfoExtractor):
'uploader': 'stereotype_', 'uploader': 'stereotype_',
'uploader_id': 'stereotype_', 'uploader_id': 'stereotype_',
}, },
} }, {
# multiple formats
'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -485,15 +489,27 @@ class TwitchClipsIE(InfoExtractor):
r'(?s)clipInfo\s*=\s*({.+?});', webpage, 'clip info'), r'(?s)clipInfo\s*=\s*({.+?});', webpage, 'clip info'),
video_id, transform_source=js_to_json) video_id, transform_source=js_to_json)
video_url = clip['clip_video_url'] title = clip.get('channel_title') or self._og_search_title(webpage)
title = clip['channel_title']
formats = [{
'url': option['source'],
'format_id': option.get('quality'),
'height': int_or_none(option.get('quality')),
} for option in clip.get('quality_options', []) if option.get('source')]
if not formats:
formats = [{
'url': clip['clip_video_url'],
}]
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'url': video_url,
'title': title, 'title': title,
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),
'creator': clip.get('broadcaster_display_name') or clip.get('broadcaster_login'), 'creator': clip.get('broadcaster_display_name') or clip.get('broadcaster_login'),
'uploader': clip.get('curator_login'), 'uploader': clip.get('curator_login'),
'uploader_id': clip.get('curator_display_name'), 'uploader_id': clip.get('curator_display_name'),
'formats': formats,
} }

View File

@ -53,6 +53,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
"""Provide base functions for Youtube extractors""" """Provide base functions for Youtube extractors"""
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin' _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge' _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
_PASSWORD_CHALLENGE_URL = 'https://accounts.google.com/signin/challenge/sl/password'
_NETRC_MACHINE = 'youtube' _NETRC_MACHINE = 'youtube'
# If True it will raise an error if no login info is provided # If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False _LOGIN_REQUIRED = False
@ -116,12 +117,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'hl': 'en_US', 'hl': 'en_US',
} }
login_data = urlencode_postdata(login_form_strs)
req = sanitized_Request(self._LOGIN_URL, login_data)
login_results = self._download_webpage( login_results = self._download_webpage(
req, None, self._PASSWORD_CHALLENGE_URL, None,
note='Logging in', errnote='unable to log in', fatal=False) note='Logging in', errnote='unable to log in', fatal=False,
data=urlencode_postdata(login_form_strs))
if login_results is False: if login_results is False:
return False return False

View File

@ -2123,6 +2123,7 @@ def mimetype2ext(mt):
'dash+xml': 'mpd', 'dash+xml': 'mpd',
'f4m': 'f4m', 'f4m': 'f4m',
'f4m+xml': 'f4m', 'f4m+xml': 'f4m',
'hds+xml': 'f4m',
'vnd.ms-sstr+xml': 'ism', 'vnd.ms-sstr+xml': 'ism',
}.get(res, res) }.get(res, res)

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2016.07.17' __version__ = '2016.07.28'