]+class="embedDetails">([0-9:]+)', webpage,
- 'duration', fatal=False))
-
- info_dict.update({
- 'title': title,
- 'description': description,
- 'duration': duration,
- })
-
- return info_dict
+ return self.playlist_result(entries, playlist_id)
diff --git a/youtube_dl/extractor/snotr.py b/youtube_dl/extractor/snotr.py
index 0d1ab07f8..4819fe5b4 100644
--- a/youtube_dl/extractor/snotr.py
+++ b/youtube_dl/extractor/snotr.py
@@ -5,9 +5,9 @@ import re
from .common import InfoExtractor
from ..utils import (
- float_or_none,
- str_to_int,
parse_duration,
+ parse_filesize,
+ str_to_int,
)
@@ -17,21 +17,24 @@ class SnotrIE(InfoExtractor):
'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks',
'info_dict': {
'id': '13708',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Drone flying through fireworks!',
- 'duration': 247,
- 'filesize_approx': 98566144,
+ 'duration': 248,
+ 'filesize_approx': 40700000,
'description': 'A drone flying through Fourth of July Fireworks',
- }
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ 'expected_warnings': ['description'],
}, {
'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10',
'info_dict': {
'id': '530',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'David Letteman - George W. Bush Top 10',
'duration': 126,
- 'filesize_approx': 8912896,
+ 'filesize_approx': 8500000,
'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!',
+ 'thumbnail': 're:^https?://.*\.jpg$',
}
}]
@@ -43,26 +46,28 @@ class SnotrIE(InfoExtractor):
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
- video_url = 'http://cdn.videos.snotr.com/%s.flv' % video_id
+ info_dict = self._parse_html5_media_entries(
+ url, webpage, video_id, m3u8_entry_protocol='m3u8_native')[0]
view_count = str_to_int(self._html_search_regex(
- r'
\nViews:\n([\d,\.]+)
',
+ r'
]*>\s*]*>Views:\s*]*>([\d,\.]+)',
webpage, 'view count', fatal=False))
duration = parse_duration(self._html_search_regex(
- r'\nLength:\n\s*([0-9:]+).*?
',
+ r']*>\s*]*>Length:\s*]*>([\d:]+)',
webpage, 'duration', fatal=False))
- filesize_approx = float_or_none(self._html_search_regex(
- r'\nFilesize:\n\s*([0-9.]+)\s*megabyte
',
- webpage, 'filesize', fatal=False), invscale=1024 * 1024)
+ filesize_approx = parse_filesize(self._html_search_regex(
+ r']*>\s*]*>Filesize:\s*]*>([^<]+)',
+ webpage, 'filesize', fatal=False))
- return {
+ info_dict.update({
'id': video_id,
'description': description,
'title': title,
- 'url': video_url,
'view_count': view_count,
'duration': duration,
'filesize_approx': filesize_approx,
- }
+ })
+
+ return info_dict
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index aeae931a2..9635c2b49 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -32,7 +32,7 @@ class SoundcloudIE(InfoExtractor):
_VALID_URL = r'''(?x)^(?:https?://)?
(?:(?:(?:www\.|m\.)?soundcloud\.com/
(?P[\w\d-]+)/
- (?!(?:tracks|sets(?:/[^/?#]+)?|reposts|likes|spotlight)/?(?:$|[?#]))
+ (?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
(?P[\w\d-]+)/?
(?P[^?]+?)?(?:[?].*)?$)
|(?:api\.soundcloud\.com/tracks/(?P\d+)
@@ -265,6 +265,9 @@ class SoundcloudSetIE(SoundcloudIE):
'title': 'The Royal Concept EP',
},
'playlist_mincount': 6,
+ }, {
+ 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py
index 50433d0f6..186d22b7d 100644
--- a/youtube_dl/extractor/spankbang.py
+++ b/youtube_dl/extractor/spankbang.py
@@ -14,7 +14,7 @@ class SpankBangIE(InfoExtractor):
'id': '3vvn',
'ext': 'mp4',
'title': 'fantasy solo',
- 'description': 'dillion harper masturbates on a bed',
+ 'description': 'Watch fantasy solo free HD porn video - 05 minutes - dillion harper masturbates on a bed free adult movies.',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'silly2587',
'age_limit': 18,
@@ -44,12 +44,10 @@ class SpankBangIE(InfoExtractor):
title = self._html_search_regex(
r'(?s)]*>(.+?)
', webpage, 'title')
- description = self._search_regex(
- r'class="desc"[^>]*>([^<]+)',
- webpage, 'description', default=None)
+ description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
uploader = self._search_regex(
- r'class="user"[^>]*>([^<]+)',
+ r'class="user"[^>]*>
]+>([^<]+)',
webpage, 'uploader', fatal=False)
age_limit = self._rta_search(webpage)
diff --git a/youtube_dl/extractor/sunporno.py b/youtube_dl/extractor/sunporno.py
index e527aa971..ef9be7926 100644
--- a/youtube_dl/extractor/sunporno.py
+++ b/youtube_dl/extractor/sunporno.py
@@ -12,25 +12,29 @@ from ..utils import (
class SunPornoIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?sunporno\.com/videos/(?P\d+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:(?:www\.)?sunporno\.com/videos|embeds\.sunporno\.com/embed)/(?P\d+)'
+ _TESTS = [{
'url': 'http://www.sunporno.com/videos/807778/',
- 'md5': '6457d3c165fd6de062b99ef6c2ff4c86',
+ 'md5': '507887e29033502f29dba69affeebfc9',
'info_dict': {
'id': '807778',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'md5:0a400058e8105d39e35c35e7c5184164',
'description': 'md5:a31241990e1bd3a64e72ae99afb325fb',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 302,
'age_limit': 18,
}
- }
+ }, {
+ 'url': 'http://embeds.sunporno.com/embed/807778',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ webpage = self._download_webpage(
+ 'http://www.sunporno.com/videos/%s' % video_id, video_id)
title = self._html_search_regex(
r'([^<]+)', webpage, 'title')
@@ -40,7 +44,8 @@ class SunPornoIE(InfoExtractor):
r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
duration = parse_duration(self._search_regex(
- r'itemprop="duration">\s*(\d+:\d+)\s*<',
+ (r'itemprop="duration"[^>]*>\s*(\d+:\d+)\s*<',
+ r'>Duration:\s*]+>\s*(\d+:\d+)\s*<'),
webpage, 'duration', fatal=False))
view_count = int_or_none(self._html_search_regex(
@@ -48,7 +53,7 @@ class SunPornoIE(InfoExtractor):
webpage, 'view count', fatal=False))
comment_count = int_or_none(self._html_search_regex(
r'(\d+) Comments?',
- webpage, 'comment count', fatal=False))
+ webpage, 'comment count', fatal=False, default=None))
formats = []
quality = qualities(['mp4', 'flv'])
diff --git a/youtube_dl/extractor/syfy.py b/youtube_dl/extractor/syfy.py
index 53723b66e..ab8bab5cd 100644
--- a/youtube_dl/extractor/syfy.py
+++ b/youtube_dl/extractor/syfy.py
@@ -1,13 +1,13 @@
from __future__ import unicode_literals
-from .theplatform import ThePlatformIE
+from .adobepass import AdobePassIE
from ..utils import (
update_url_query,
smuggle_url,
)
-class SyfyIE(ThePlatformIE):
+class SyfyIE(AdobePassIE):
_VALID_URL = r'https?://www\.syfy\.com/(?:[^/]+/)?videos/(?P[^/?#]+)'
_TESTS = [{
'url': 'http://www.syfy.com/theinternetruinedmylife/videos/the-internet-ruined-my-life-season-1-trailer',
@@ -31,7 +31,7 @@ class SyfyIE(ThePlatformIE):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
syfy_mpx = list(self._parse_json(self._search_regex(
- r'jQuery\.extend\([^,]+,\s*({.+})\);', webpage, 'drupal settings'),
+ r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'),
display_id)['syfy']['syfy_mpx'].values())[0]
video_id = syfy_mpx['mpxGUID']
title = syfy_mpx['episodeTitle']
@@ -40,7 +40,9 @@ class SyfyIE(ThePlatformIE):
'manifest': 'm3u',
}
if syfy_mpx.get('entitlement') == 'auth':
- resource = 'syfy- %s%s
' % (title, video_id, syfy_mpx.get('mpxRating', 'TV-14'))
+ resource = self._get_mvpd_resource(
+ 'syfy', title, video_id,
+ syfy_mpx.get('mpxRating', 'TV-14'))
query['auth'] = self._extract_mvpd_auth(
url, video_id, 'syfy', resource)
diff --git a/youtube_dl/extractor/tapely.py b/youtube_dl/extractor/tapely.py
deleted file mode 100644
index ed560bd24..000000000
--- a/youtube_dl/extractor/tapely.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- clean_html,
- ExtractorError,
- float_or_none,
- parse_iso8601,
- sanitized_Request,
-)
-
-
-class TapelyIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:tape\.ly|tapely\.com)/(?P[A-Za-z0-9\-_]+)(?:/(?P\d+))?'
- _API_URL = 'http://tape.ly/showtape?id={0:}'
- _S3_SONG_URL = 'http://mytape.s3.amazonaws.com/{0:}'
- _SOUNDCLOUD_SONG_URL = 'http://api.soundcloud.com{0:}'
- _TESTS = [
- {
- 'url': 'http://tape.ly/my-grief-as-told-by-water',
- 'info_dict': {
- 'id': 23952,
- 'title': 'my grief as told by water',
- 'thumbnail': 're:^https?://.*\.png$',
- 'uploader_id': 16484,
- 'timestamp': 1411848286,
- 'description': 'For Robin and Ponkers, whom the tides of life have taken out to sea.',
- },
- 'playlist_count': 13,
- },
- {
- 'url': 'http://tape.ly/my-grief-as-told-by-water/1',
- 'md5': '79031f459fdec6530663b854cbc5715c',
- 'info_dict': {
- 'id': 258464,
- 'title': 'Dreaming Awake (My Brightest Diamond)',
- 'ext': 'm4a',
- },
- },
- {
- 'url': 'https://tapely.com/my-grief-as-told-by-water',
- 'only_matching': True,
- },
- ]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- display_id = mobj.group('id')
-
- playlist_url = self._API_URL.format(display_id)
- request = sanitized_Request(playlist_url)
- request.add_header('X-Requested-With', 'XMLHttpRequest')
- request.add_header('Accept', 'application/json')
- request.add_header('Referer', url)
-
- playlist = self._download_json(request, display_id)
-
- tape = playlist['tape']
-
- entries = []
- for s in tape['songs']:
- song = s['song']
- entry = {
- 'id': song['id'],
- 'duration': float_or_none(song.get('songduration'), 1000),
- 'title': song['title'],
- }
- if song['source'] == 'S3':
- entry.update({
- 'url': self._S3_SONG_URL.format(song['filename']),
- })
- entries.append(entry)
- elif song['source'] == 'YT':
- self.to_screen('YouTube video detected')
- yt_id = song['filename'].replace('/youtube/', '')
- entry.update(self.url_result(yt_id, 'Youtube', video_id=yt_id))
- entries.append(entry)
- elif song['source'] == 'SC':
- self.to_screen('SoundCloud song detected')
- sc_url = self._SOUNDCLOUD_SONG_URL.format(song['filename'])
- entry.update(self.url_result(sc_url, 'Soundcloud'))
- entries.append(entry)
- else:
- self.report_warning('Unknown song source: %s' % song['source'])
-
- if mobj.group('songnr'):
- songnr = int(mobj.group('songnr')) - 1
- try:
- return entries[songnr]
- except IndexError:
- raise ExtractorError(
- 'No song with index: %s' % mobj.group('songnr'),
- expected=True)
-
- return {
- '_type': 'playlist',
- 'id': tape['id'],
- 'display_id': display_id,
- 'title': tape['name'],
- 'entries': entries,
- 'thumbnail': tape.get('image_url'),
- 'description': clean_html(tape.get('subtext')),
- 'like_count': tape.get('likescount'),
- 'uploader_id': tape.get('user_id'),
- 'timestamp': parse_iso8601(tape.get('published_at')),
- }
diff --git a/youtube_dl/extractor/tbs.py b/youtube_dl/extractor/tbs.py
new file mode 100644
index 000000000..79b00e376
--- /dev/null
+++ b/youtube_dl/extractor/tbs.py
@@ -0,0 +1,59 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .turner import TurnerBaseIE
+from ..utils import (
+ extract_attributes,
+ ExtractorError,
+)
+
+
+class TBSIE(TurnerBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?(?Ptbs|tntdrama)\.com/videos/(?:[^/]+/)+(?P[^/?#]+)\.html'
+ _TESTS = [{
+ 'url': 'http://www.tbs.com/videos/people-of-earth/season-1/extras/2007318/theatrical-trailer.html',
+ 'md5': '9e61d680e2285066ade7199e6408b2ee',
+ 'info_dict': {
+ 'id': '2007318',
+ 'ext': 'mp4',
+ 'title': 'Theatrical Trailer',
+ 'description': 'Catch the latest comedy from TBS, People of Earth, premiering Halloween night--Monday, October 31, at 9/8c.',
+ }
+ }, {
+ 'url': 'http://www.tntdrama.com/videos/good-behavior/season-1/extras/1538823/you-better-run.html',
+ 'md5': 'ce53c6ead5e9f3280b4ad2031a6fab56',
+ 'info_dict': {
+ 'id': '1538823',
+ 'ext': 'mp4',
+ 'title': 'You Better Run',
+ 'description': 'Letty Raines must figure out what she\'s running toward while running away from her past. Good Behavior premieres November 15 at 9/8c.',
+ }
+ }]
+
+ def _real_extract(self, url):
+ domain, display_id = re.match(self._VALID_URL, url).groups()
+ site = domain[:3]
+ webpage = self._download_webpage(url, display_id)
+ video_params = extract_attributes(self._search_regex(r'(<[^>]+id="page-video"[^>]*>)', webpage, 'video params'))
+ if video_params.get('isAuthRequired') == 'true':
+ raise ExtractorError(
+ 'This video is only available via cable service provider subscription that'
+ ' is not currently supported.', expected=True)
+ query = None
+ clip_id = video_params.get('clipid')
+ if clip_id:
+ query = 'id=' + clip_id
+ else:
+ query = 'titleId=' + video_params['titleid']
+ return self._extract_cvp_info(
+ 'http://www.%s.com/service/cvpXml?%s' % (domain, query), display_id, {
+ 'default': {
+ 'media_src': 'http://ht.cdn.turner.com/%s/big' % site,
+ },
+ 'secure': {
+ 'media_src': 'http://apple-secure.cdn.turner.com/%s/big' % site,
+ 'tokenizer_src': 'http://www.%s.com/video/processors/services/token_ipadAdobe.do' % domain,
+ },
+ })
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index bb3efc4ea..23067e8c6 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -6,10 +6,10 @@ import time
import hmac
import binascii
import hashlib
-import netrc
from .once import OnceIE
+from .adobepass import AdobePassIE
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
@@ -25,9 +25,6 @@ from ..utils import (
xpath_with_ns,
mimetype2ext,
find_xpath_attr,
- unescapeHTML,
- urlencode_postdata,
- unified_timestamp,
)
default_ns = 'http://www.w3.org/2005/SMIL21/Language'
@@ -76,10 +73,10 @@ class ThePlatformBaseIE(OnceIE):
if isinstance(captions, list):
for caption in captions:
lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
- subtitles[lang] = [{
+ subtitles.setdefault(lang, []).append({
'ext': mimetype2ext(mime),
'url': src,
- }]
+ })
return {
'title': info['title'],
@@ -96,7 +93,7 @@ class ThePlatformBaseIE(OnceIE):
return self._parse_theplatform_metadata(info)
-class ThePlatformIE(ThePlatformBaseIE):
+class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
_VALID_URL = r'''(?x)
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P[^/]+)/
(?:(?:(?:[^/]+/)+select/)?(?Pmedia/(?:guid/\d+/)?)|(?P(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
@@ -167,7 +164,6 @@ class ThePlatformIE(ThePlatformBaseIE):
'url': 'http://player.theplatform.com/p/NnzsPC/onsite_universal/select/media/guid/2410887629/2928790?fwsitesection=nbc_the_blacklist_video_library&autoPlay=true&carouselID=137781',
'only_matching': True,
}]
- _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
@classmethod
def _extract_urls(cls, webpage):
@@ -202,96 +198,6 @@ class ThePlatformIE(ThePlatformBaseIE):
sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
return '%s&sig=%s' % (url, sig)
- def _extract_mvpd_auth(self, url, video_id, requestor_id, resource):
- def xml_text(xml_str, tag):
- return self._search_regex(
- '<%s>(.+?)%s>' % (tag, tag), xml_str, tag)
-
- mvpd_headers = {
- 'ap_42': 'anonymous',
- 'ap_11': 'Linux i686',
- 'ap_z': 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0',
- 'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0',
- }
-
- guid = xml_text(resource, 'guid')
- requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {}
- authn_token = requestor_info.get('authn_token')
- if authn_token:
- token_expires = unified_timestamp(xml_text(authn_token, 'simpleTokenExpires').replace('_GMT', ''))
- if token_expires and token_expires >= time.time():
- authn_token = None
- if not authn_token:
- # TODO add support for other TV Providers
- mso_id = 'DTV'
- login_info = netrc.netrc().authenticators(mso_id)
- if not login_info:
- return None
-
- def post_form(form_page, note, data={}):
- post_url = self._html_search_regex(r'