Merge remote-tracking branch 'origin/master' into paj/sbs-news-without-id

This commit is contained in:
Pete Johns 2019-05-27 11:15:06 +10:00
commit e355d11e13
No known key found for this signature in database
GPG Key ID: 06099EDB683EDD63
17 changed files with 99 additions and 397 deletions

View File

@ -9,6 +9,7 @@ python:
- "3.6" - "3.6"
- "pypy" - "pypy"
- "pypy3" - "pypy3"
dist: trusty
env: env:
- YTDL_TEST_SET=core - YTDL_TEST_SET=core
- YTDL_TEST_SET=download - YTDL_TEST_SET=download

View File

@ -73,6 +73,7 @@ from youtube_dl.utils import (
smuggle_url, smuggle_url,
str_to_int, str_to_int,
strip_jsonp, strip_jsonp,
strip_or_none,
timeconvert, timeconvert,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
@ -752,6 +753,18 @@ class TestUtil(unittest.TestCase):
d = json.loads(stripped) d = json.loads(stripped)
self.assertEqual(d, {'status': 'success'}) self.assertEqual(d, {'status': 'success'})
def test_strip_or_none(self):
self.assertEqual(strip_or_none(' abc'), 'abc')
self.assertEqual(strip_or_none('abc '), 'abc')
self.assertEqual(strip_or_none(' abc '), 'abc')
self.assertEqual(strip_or_none('\tabc\t'), 'abc')
self.assertEqual(strip_or_none('\n\tabc\n\t'), 'abc')
self.assertEqual(strip_or_none('abc'), 'abc')
self.assertEqual(strip_or_none(''), '')
self.assertEqual(strip_or_none(None), None)
self.assertEqual(strip_or_none(42), None)
self.assertEqual(strip_or_none([]), None)
def test_uppercase_escape(self): def test_uppercase_escape(self):
self.assertEqual(uppercase_escape(''), '') self.assertEqual(uppercase_escape(''), '')
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐') self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')

View File

@ -65,8 +65,9 @@ class BitChuteIE(InfoExtractor):
webpage, default=None) or self._html_search_meta( webpage, default=None) or self._html_search_meta(
'twitter:image:src', webpage, 'thumbnail') 'twitter:image:src', webpage, 'thumbnail')
uploader = self._html_search_regex( uploader = self._html_search_regex(
r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>', webpage, (r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>',
'uploader', fatal=False) r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'),
webpage, 'uploader', fatal=False)
return { return {
'id': video_id, 'id': video_id,

View File

@ -67,6 +67,7 @@ from ..utils import (
sanitized_Request, sanitized_Request,
sanitize_filename, sanitize_filename,
str_or_none, str_or_none,
strip_or_none,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
unified_timestamp, unified_timestamp,
@ -2480,7 +2481,7 @@ class InfoExtractor(object):
'subtitles': {}, 'subtitles': {},
} }
media_attributes = extract_attributes(media_tag) media_attributes = extract_attributes(media_tag)
src = media_attributes.get('src') src = strip_or_none(media_attributes.get('src'))
if src: if src:
_, formats = _media_formats(src, media_type) _, formats = _media_formats(src, media_type)
media_info['formats'].extend(formats) media_info['formats'].extend(formats)
@ -2490,7 +2491,7 @@ class InfoExtractor(object):
s_attr = extract_attributes(source_tag) s_attr = extract_attributes(source_tag)
# data-video-src and data-src are non standard but seen # data-video-src and data-src are non standard but seen
# several times in the wild # several times in the wild
src = dict_get(s_attr, ('src', 'data-video-src', 'data-src')) src = strip_or_none(dict_get(s_attr, ('src', 'data-video-src', 'data-src')))
if not src: if not src:
continue continue
f = parse_content_type(s_attr.get('type')) f = parse_content_type(s_attr.get('type'))
@ -2533,7 +2534,7 @@ class InfoExtractor(object):
track_attributes = extract_attributes(track_tag) track_attributes = extract_attributes(track_tag)
kind = track_attributes.get('kind') kind = track_attributes.get('kind')
if not kind or kind in ('subtitles', 'captions'): if not kind or kind in ('subtitles', 'captions'):
src = track_attributes.get('src') src = strip_or_none(track_attributes.get('src'))
if not src: if not src:
continue continue
lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label') lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label')

View File

@ -1,39 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class CriterionIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?criterion\.com/films/(?P<id>[0-9]+)-.+'
_TEST = {
'url': 'http://www.criterion.com/films/184-le-samourai',
'md5': 'bc51beba55685509883a9a7830919ec3',
'info_dict': {
'id': '184',
'ext': 'mp4',
'title': 'Le Samouraï',
'description': 'md5:a2b4b116326558149bef81f76dcbb93f',
'thumbnail': r're:^https?://.*\.jpg$',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
final_url = self._search_regex(
r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
title = self._og_search_title(webpage)
description = self._html_search_meta('description', webpage)
thumbnail = self._search_regex(
r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;',
webpage, 'thumbnail url')
return {
'id': video_id,
'url': final_url,
'title': title,
'description': description,
'thumbnail': thumbnail,
}

View File

@ -240,7 +240,6 @@ from .condenast import CondeNastIE
from .corus import CorusIE from .corus import CorusIE
from .cracked import CrackedIE from .cracked import CrackedIE
from .crackle import CrackleIE from .crackle import CrackleIE
from .criterion import CriterionIE
from .crooksandliars import CrooksAndLiarsIE from .crooksandliars import CrooksAndLiarsIE
from .crunchyroll import ( from .crunchyroll import (
CrunchyrollIE, CrunchyrollIE,
@ -772,13 +771,6 @@ from .nova import (
NovaEmbedIE, NovaEmbedIE,
NovaIE, NovaIE,
) )
from .novamov import (
AuroraVidIE,
CloudTimeIE,
NowVideoIE,
VideoWeedIE,
WholeCloudIE,
)
from .nowness import ( from .nowness import (
NownessIE, NownessIE,
NownessPlaylistIE, NownessPlaylistIE,
@ -896,7 +888,6 @@ from .polskieradio import (
from .popcorntv import PopcornTVIE from .popcorntv import PopcornTVIE
from .porn91 import Porn91IE from .porn91 import Porn91IE
from .porncom import PornComIE from .porncom import PornComIE
from .pornflip import PornFlipIE
from .pornhd import PornHdIE from .pornhd import PornHdIE
from .pornhub import ( from .pornhub import (
PornHubIE, PornHubIE,

View File

@ -2583,19 +2583,6 @@ class GenericIE(InfoExtractor):
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group(1), 'Mpora') return self.url_result(mobj.group(1), 'Mpora')
# Look for embedded NovaMov-based player
mobj = re.search(
r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
(?P<url>http://(?:(?:embed|www)\.)?
(?:novamov\.com|
nowvideo\.(?:ch|sx|eu|at|ag|co)|
videoweed\.(?:es|com)|
movshare\.(?:net|sx|ag)|
divxstage\.(?:eu|net|ch|co|at|ag))
/embed\.php.+?)\1''', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'))
# Look for embedded Facebook player # Look for embedded Facebook player
facebook_urls = FacebookIE._extract_urls(webpage) facebook_urls = FacebookIE._extract_urls(webpage)
if facebook_urls: if facebook_urls:

View File

@ -1,212 +0,0 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
ExtractorError,
NO_DEFAULT,
sanitized_Request,
urlencode_postdata,
)
class NovaMovIE(InfoExtractor):
IE_NAME = 'novamov'
IE_DESC = 'NovaMov'
_VALID_URL_TEMPLATE = r'''(?x)
http://
(?:
(?:www\.)?%(host)s/(?:file|video|mobile/\#/videos)/|
(?:(?:embed|www)\.)%(host)s/embed(?:\.php|/)?\?(?:.*?&)?\bv=
)
(?P<id>[a-z\d]{13})
'''
_VALID_URL = _VALID_URL_TEMPLATE % {'host': r'novamov\.com'}
_HOST = 'www.novamov.com'
_FILE_DELETED_REGEX = r'This file no longer exists on our servers!</h2>'
_FILEKEY_REGEX = r'flashvars\.filekey=(?P<filekey>"?[^"]+"?);'
_TITLE_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>'
_DESCRIPTION_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>'
_URL_TEMPLATE = 'http://%s/video/%s'
_TEST = None
def _check_existence(self, webpage, video_id):
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
def _real_extract(self, url):
video_id = self._match_id(url)
url = self._URL_TEMPLATE % (self._HOST, video_id)
webpage = self._download_webpage(
url, video_id, 'Downloading video page')
self._check_existence(webpage, video_id)
def extract_filekey(default=NO_DEFAULT):
filekey = self._search_regex(
self._FILEKEY_REGEX, webpage, 'filekey', default=default)
if filekey is not default and (filekey[0] != '"' or filekey[-1] != '"'):
return self._search_regex(
r'var\s+%s\s*=\s*"([^"]+)"' % re.escape(filekey), webpage, 'filekey', default=default)
else:
return filekey
filekey = extract_filekey(default=None)
if not filekey:
fields = self._hidden_inputs(webpage)
post_url = self._search_regex(
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', webpage,
'post url', default=url, group='url')
if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(url, post_url)
request = sanitized_Request(
post_url, urlencode_postdata(fields))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
request.add_header('Referer', post_url)
webpage = self._download_webpage(
request, video_id, 'Downloading continue to the video page')
self._check_existence(webpage, video_id)
filekey = extract_filekey()
title = self._html_search_regex(self._TITLE_REGEX, webpage, 'title')
description = self._html_search_regex(self._DESCRIPTION_REGEX, webpage, 'description', default='', fatal=False)
api_response = self._download_webpage(
'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id,
'Downloading video api response')
response = compat_urlparse.parse_qs(api_response)
if 'error_msg' in response:
raise ExtractorError('%s returned error: %s' % (self.IE_NAME, response['error_msg'][0]), expected=True)
video_url = response['url'][0]
return {
'id': video_id,
'url': video_url,
'title': title,
'description': description
}
class WholeCloudIE(NovaMovIE):
IE_NAME = 'wholecloud'
IE_DESC = 'WholeCloud'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'(?:wholecloud\.net|movshare\.(?:net|sx|ag))'}
_HOST = 'www.wholecloud.net'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>'
_DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>'
_TEST = {
'url': 'http://www.wholecloud.net/video/559e28be54d96',
'md5': 'abd31a2132947262c50429e1d16c1bfd',
'info_dict': {
'id': '559e28be54d96',
'ext': 'flv',
'title': 'dissapeared image',
'description': 'optical illusion dissapeared image magic illusion',
}
}
class NowVideoIE(NovaMovIE):
IE_NAME = 'nowvideo'
IE_DESC = 'NowVideo'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'}
_HOST = 'www.nowvideo.to'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<h4>([^<]+)</h4>'
_DESCRIPTION_REGEX = r'</h4>\s*<p>([^<]+)</p>'
_TEST = {
'url': 'http://www.nowvideo.sx/video/f1d6fce9a968b',
'md5': '12c82cad4f2084881d8bc60ee29df092',
'info_dict': {
'id': 'f1d6fce9a968b',
'ext': 'flv',
'title': 'youtubedl test video BaWjenozKc',
'description': 'Description',
},
}
class VideoWeedIE(NovaMovIE):
IE_NAME = 'videoweed'
IE_DESC = 'VideoWeed'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'videoweed\.(?:es|com)'}
_HOST = 'www.videoweed.es'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>'
_URL_TEMPLATE = 'http://%s/file/%s'
_TEST = {
'url': 'http://www.videoweed.es/file/b42178afbea14',
'md5': 'abd31a2132947262c50429e1d16c1bfd',
'info_dict': {
'id': 'b42178afbea14',
'ext': 'flv',
'title': 'optical illusion dissapeared image magic illusion',
'description': ''
},
}
class CloudTimeIE(NovaMovIE):
IE_NAME = 'cloudtime'
IE_DESC = 'CloudTime'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'cloudtime\.to'}
_HOST = 'www.cloudtime.to'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<div[^>]+class=["\']video_det["\'][^>]*>\s*<strong>([^<]+)</strong>'
_TEST = None
class AuroraVidIE(NovaMovIE):
IE_NAME = 'auroravid'
IE_DESC = 'AuroraVid'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'auroravid\.to'}
_HOST = 'www.auroravid.to'
_FILE_DELETED_REGEX = r'This file no longer exists on our servers!<'
_TESTS = [{
'url': 'http://www.auroravid.to/video/4rurhn9x446jj',
'md5': '7205f346a52bbeba427603ba10d4b935',
'info_dict': {
'id': '4rurhn9x446jj',
'ext': 'flv',
'title': 'search engine optimization',
'description': 'search engine optimization is used to rank the web page in the google search engine'
},
'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)'
}, {
'url': 'http://www.auroravid.to/embed/?v=4rurhn9x446jj',
'only_matching': True,
}]

View File

@ -244,7 +244,7 @@ class PhantomJSwrapper(object):
class OpenloadIE(InfoExtractor): class OpenloadIE(InfoExtractor):
_DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live|space|services)|oladblock\.(?:services|xyz|me)|openloed\.co)' _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|live|space|services)|oladblock\.(?:services|xyz|me)|openloed\.co)'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?P<host> (?P<host>
@ -357,6 +357,9 @@ class OpenloadIE(InfoExtractor):
}, { }, {
'url': 'https://oload.services/embed/bs1NWj1dCag/', 'url': 'https://oload.services/embed/bs1NWj1dCag/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://oload.press/embed/drTBl1aOTvk/',
'only_matching': True,
}, { }, {
'url': 'https://oladblock.services/f/b8NWEgkqNLI/', 'url': 'https://oladblock.services/f/b8NWEgkqNLI/',
'only_matching': True, 'only_matching': True,

View File

@ -1,101 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_str,
)
from ..utils import (
int_or_none,
try_get,
unified_timestamp,
)
class PornFlipIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.pornflip.com/v/wz7DfNhMmep',
'md5': '98c46639849145ae1fd77af532a9278c',
'info_dict': {
'id': 'wz7DfNhMmep',
'ext': 'mp4',
'title': '2 Amateurs swallow make his dream cumshots true',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 112,
'timestamp': 1481655502,
'upload_date': '20161213',
'uploader_id': '106786',
'uploader': 'figifoto',
'view_count': int,
'age_limit': 18,
}
}, {
'url': 'https://www.pornflip.com/embed/wz7DfNhMmep',
'only_matching': True,
}, {
'url': 'https://www.pornflip.com/v/EkRD6-vS2-s',
'only_matching': True,
}, {
'url': 'https://www.pornflip.com/embed/EkRD6-vS2-s',
'only_matching': True,
}, {
'url': 'https://www.pornflip.com/v/NG9q6Pb_iK8',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'https://www.pornflip.com/v/%s' % video_id, video_id)
flashvars = compat_parse_qs(self._search_regex(
r'<embed[^>]+flashvars=(["\'])(?P<flashvars>(?:(?!\1).)+)\1',
webpage, 'flashvars', group='flashvars'))
title = flashvars['video_vars[title]'][0]
def flashvar(kind):
return try_get(
flashvars, lambda x: x['video_vars[%s]' % kind][0], compat_str)
formats = []
for key, value in flashvars.items():
if not (value and isinstance(value, list)):
continue
format_url = value[0]
if key == 'video_vars[hds_manifest]':
formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False))
continue
height = self._search_regex(
r'video_vars\[video_urls\]\[(\d+)', key, 'height', default=None)
if not height:
continue
formats.append({
'url': format_url,
'format_id': 'http-%s' % height,
'height': int_or_none(height),
})
self._sort_formats(formats)
uploader = self._html_search_regex(
(r'<span[^>]+class="name"[^>]*>\s*<a[^>]+>\s*<strong>(?P<uploader>[^<]+)',
r'<meta[^>]+content=(["\'])[^>]*\buploaded by (?P<uploader>.+?)\1'),
webpage, 'uploader', fatal=False, group='uploader')
return {
'id': video_id,
'formats': formats,
'title': title,
'thumbnail': flashvar('big_thumb'),
'duration': int_or_none(flashvar('duration')),
'timestamp': unified_timestamp(self._html_search_meta(
'uploadDate', webpage, 'timestamp')),
'uploader_id': flashvar('author_id'),
'uploader': uploader,
'view_count': int_or_none(flashvar('views')),
'age_limit': 18,
}

View File

@ -170,7 +170,7 @@ class PornHubIE(PornHubBaseIE):
def dl_webpage(platform): def dl_webpage(platform):
self._set_cookie(host, 'platform', platform) self._set_cookie(host, 'platform', platform)
return self._download_webpage( return self._download_webpage(
'http://www.%s/view_video.php?viewkey=%s' % (host, video_id), 'https://www.%s/view_video.php?viewkey=%s' % (host, video_id),
video_id, 'Downloading %s webpage' % platform) video_id, 'Downloading %s webpage' % platform)
webpage = dl_webpage('pc') webpage = dl_webpage('pc')

View File

@ -3,8 +3,11 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_b64decode from ..compat import compat_b64decode
from ..utils import ( from ..utils import (
determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
KNOWN_EXTENSIONS,
parse_filesize,
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
) )
@ -22,10 +25,8 @@ class SharedBaseIE(InfoExtractor):
video_url = self._extract_video_url(webpage, video_id, url) video_url = self._extract_video_url(webpage, video_id, url)
title = compat_b64decode(self._html_search_meta( title = self._extract_title(webpage)
'full:title', webpage, 'title')).decode('utf-8') filesize = int_or_none(self._extract_filesize(webpage))
filesize = int_or_none(self._html_search_meta(
'full:size', webpage, 'file size', fatal=False))
return { return {
'id': video_id, 'id': video_id,
@ -35,6 +36,14 @@ class SharedBaseIE(InfoExtractor):
'title': title, 'title': title,
} }
def _extract_title(self, webpage):
return compat_b64decode(self._html_search_meta(
'full:title', webpage, 'title')).decode('utf-8')
def _extract_filesize(self, webpage):
return self._html_search_meta(
'full:size', webpage, 'file size', fatal=False)
class SharedIE(SharedBaseIE): class SharedIE(SharedBaseIE):
IE_DESC = 'shared.sx' IE_DESC = 'shared.sx'
@ -82,11 +91,27 @@ class VivoIE(SharedBaseIE):
'id': 'd7ddda0e78', 'id': 'd7ddda0e78',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Chicken', 'title': 'Chicken',
'filesize': 528031, 'filesize': 515659,
}, },
} }
def _extract_video_url(self, webpage, video_id, *args): def _extract_title(self, webpage):
title = self._html_search_regex(
r'data-name\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', webpage,
'title', default=None, group='title')
if title:
ext = determine_ext(title)
if ext.lower() in KNOWN_EXTENSIONS:
title = title.rpartition('.' + ext)[0]
return title
return self._og_search_title(webpage)
def _extract_filesize(self, webpage):
return parse_filesize(self._search_regex(
r'data-type=["\']video["\'][^>]*>Watch.*?<strong>\s*\((.+?)\)',
webpage, 'filesize', fatal=False))
def _extract_video_url(self, webpage, video_id, url):
def decode_url(encoded_url): def decode_url(encoded_url):
return compat_b64decode(encoded_url).decode('utf-8') return compat_b64decode(encoded_url).decode('utf-8')

View File

@ -106,7 +106,16 @@ class SRGSSRIE(InfoExtractor):
class SRGSSRPlayIE(InfoExtractor): class SRGSSRPlayIE(InfoExtractor):
IE_DESC = 'srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites' IE_DESC = 'srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites'
_VALID_URL = r'https?://(?:(?:www|play)\.)?(?P<bu>srf|rts|rsi|rtr|swissinfo)\.ch/play/(?:tv|radio)/[^/]+/(?P<type>video|audio)/[^?]+\?id=(?P<id>[0-9a-f\-]{36}|\d+)' _VALID_URL = r'''(?x)
https?://
(?:(?:www|play)\.)?
(?P<bu>srf|rts|rsi|rtr|swissinfo)\.ch/play/(?:tv|radio)/
(?:
[^/]+/(?P<type>video|audio)/[^?]+|
popup(?P<type_2>video|audio)player
)
\?id=(?P<id>[0-9a-f\-]{36}|\d+)
'''
_TESTS = [{ _TESTS = [{
'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5', 'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
@ -163,9 +172,15 @@ class SRGSSRPlayIE(InfoExtractor):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
} }
}, {
'url': 'https://www.srf.ch/play/tv/popupvideoplayer?id=c4dba0ca-e75b-43b2-a34f-f708a4932e01',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
bu, media_type, media_id = re.match(self._VALID_URL, url).groups() mobj = re.match(self._VALID_URL, url)
bu = mobj.group('bu')
media_type = mobj.group('type') or mobj.group('type_2')
media_id = mobj.group('id')
# other info can be extracted from url + '&layout=json' # other info can be extracted from url + '&layout=json'
return self.url_result('srgssr:%s:%s:%s' % (bu[:3], media_type, media_id), 'SRGSSR') return self.url_result('srgssr:%s:%s:%s' % (bu[:3], media_type, media_id), 'SRGSSR')

View File

@ -45,7 +45,7 @@ class StreamcloudIE(InfoExtractor):
value="([^"]*)" value="([^"]*)"
''', orig_webpage) ''', orig_webpage)
self._sleep(12, video_id) self._sleep(6, video_id)
webpage = self._download_webpage( webpage = self._download_webpage(
url, video_id, data=urlencode_postdata(fields), headers={ url, video_id, data=urlencode_postdata(fields), headers={

View File

@ -14,7 +14,18 @@ from ..utils import (
class TwentyFourVideoIE(InfoExtractor): class TwentyFourVideoIE(InfoExtractor):
IE_NAME = '24video' IE_NAME = '24video'
_VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sexy?|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)' _VALID_URL = r'''(?x)
https?://
(?P<host>
(?:(?:www|porno)\.)?24video\.
(?:net|me|xxx|sexy?|tube|adult|site)
)/
(?:
video/(?:(?:view|xml)/)?|
player/new24_play\.swf\?id=
)
(?P<id>\d+)
'''
_TESTS = [{ _TESTS = [{
'url': 'http://www.24video.net/video/view/1044982', 'url': 'http://www.24video.net/video/view/1044982',
@ -42,6 +53,12 @@ class TwentyFourVideoIE(InfoExtractor):
}, { }, {
'url': 'http://www.24video.tube/video/view/2363750', 'url': 'http://www.24video.tube/video/view/2363750',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.24video.site/video/view/2640421',
'only_matching': True,
}, {
'url': 'https://porno.24video.net/video/2640421-vsya-takaya-gibkaya-i-v-masle',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1789,9 +1789,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
raise ExtractorError( raise ExtractorError(
'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id) 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
if video_info.get('license_info'):
raise ExtractorError('This video is DRM protected.', expected=True)
video_details = try_get( video_details = try_get(
player_response, lambda x: x['videoDetails'], dict) or {} player_response, lambda x: x['videoDetails'], dict) or {}
@ -1927,7 +1924,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
formats = [] formats = []
for url_data_str in encoded_url_map.split(','): for url_data_str in encoded_url_map.split(','):
url_data = compat_parse_qs(url_data_str) url_data = compat_parse_qs(url_data_str)
if 'itag' not in url_data or 'url' not in url_data: if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'):
continue continue
stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0])) stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
# Unsupported FORMAT_STREAM_TYPE_OTF # Unsupported FORMAT_STREAM_TYPE_OTF
@ -2323,6 +2320,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'"token" parameter not in video info for unknown reason', '"token" parameter not in video info for unknown reason',
video_id=video_id) video_id=video_id)
if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
raise ExtractorError('This video is DRM protected.', expected=True)
self._sort_formats(formats) self._sort_formats(formats)
self.mark_watched(video_id, video_info, player_response) self.mark_watched(video_id, video_info, player_response)

View File

@ -1951,8 +1951,8 @@ def bool_or_none(v, default=None):
return v if isinstance(v, bool) else default return v if isinstance(v, bool) else default
def strip_or_none(v): def strip_or_none(v, default=None):
return None if v is None else v.strip() return v.strip() if isinstance(v, compat_str) else default
def url_or_none(url): def url_or_none(url):