[openload] add support for verystream.com (website is similar to openload)

[openload] minor verystream bug fixes

[verystream] seperate extractor (now, a wrapper around Openload)

[verystream] become flake8 compliant

[generic] fix _extract_urls for Openload and Verystream

[genetic] fix _extract_urls for Openload and Verystream

[verystream] change test to only_matching

[verystream] don't require phantomjs

[verystream] flake8 compliance

[verystream] don't use Openload's built in ID's

[verystream] make requested changes on GitHub

[openload] add support for verystream.com (website is similar to openload)

[openload] minor verystream bug fixes

[verystream] seperate extractor (now, a wrapper around Openload)

[verystream] become flake8 compliant

[generic] fix _extract_urls for Openload and Verystream

[genetic] fix _extract_urls for Openload and Verystream

[verystream] change test to only_matching

[verystream] don't require phantomjs

[verystream] flake8 compliance

[verystream] don't use Openload's built in ID's

[verystream] make requested changes on GitHub
This commit is contained in:
Elliot Algase 2019-05-05 16:16:27 -04:00
parent f8c55c6664
commit b55cd2d07e
3 changed files with 80 additions and 20 deletions

View File

@ -833,7 +833,10 @@ from .ooyala import (
OoyalaIE, OoyalaIE,
OoyalaExternalIE, OoyalaExternalIE,
) )
from .openload import OpenloadIE from .openload import (
OpenloadIE,
VerystreamIE,
)
from .ora import OraTVIE from .ora import OraTVIE
from .orf import ( from .orf import (
ORFTVthekIE, ORFTVthekIE,

View File

@ -89,7 +89,10 @@ from .piksel import PikselIE
from .videa import VideaIE from .videa import VideaIE
from .twentymin import TwentyMinutenIE from .twentymin import TwentyMinutenIE
from .ustream import UstreamIE from .ustream import UstreamIE
from .openload import OpenloadIE from .openload import (
OpenloadIE,
VerystreamIE,
)
from .videopress import VideoPressIE from .videopress import VideoPressIE
from .rutube import RutubeIE from .rutube import RutubeIE
from .limelight import LimelightBaseIE from .limelight import LimelightBaseIE
@ -3017,6 +3020,12 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
openload_urls, video_id, video_title, ie=OpenloadIE.ie_key()) openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
# Look for Verystream embeds
verystream_urls = VerystreamIE._extract_urls(webpage)
if verystream_urls:
return self.playlist_from_matches(
verystream_urls, video_id, video_title, ie=VerystreamIE.ie_key())
# Look for VideoPress embeds # Look for VideoPress embeds
videopress_urls = VideoPressIE._extract_urls(webpage) videopress_urls = VideoPressIE._extract_urls(webpage)
if videopress_urls: if videopress_urls:

View File

@ -254,7 +254,10 @@ class OpenloadIE(InfoExtractor):
(?:f|embed)/ (?:f|embed)/
(?P<id>[a-zA-Z0-9-_]+) (?P<id>[a-zA-Z0-9-_]+)
''' % _DOMAINS ''' % _DOMAINS
_EMBED_WORD = 'embed'
_STREAM_WORD = 'f'
_REDIR_WORD = 'stream'
_URL_IDS = ('streamurl', 'streamuri', 'streamurj')
_TESTS = [{ _TESTS = [{
'url': 'https://openload.co/f/kUEfGclsU9o', 'url': 'https://openload.co/f/kUEfGclsU9o',
'md5': 'bf1c059b004ebc7a256f89408e65c36e', 'md5': 'bf1c059b004ebc7a256f89408e65c36e',
@ -1954,6 +1957,11 @@ class OpenloadIE(InfoExtractor):
r'<iframe[^>]+src=["\']((?:https?://)?%s/embed/[a-zA-Z0-9-_]+)' r'<iframe[^>]+src=["\']((?:https?://)?%s/embed/[a-zA-Z0-9-_]+)'
% OpenloadIE._DOMAINS, webpage) % OpenloadIE._DOMAINS, webpage)
def _extract_decrypted_page(self, page_url, webpage, video_id, headers):
phantom = PhantomJSwrapper(self, required_version='2.0')
webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers)
return webpage
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
host = mobj.group('host') host = mobj.group('host')
@ -1964,9 +1972,9 @@ class OpenloadIE(InfoExtractor):
'User-Agent': self._USER_AGENT_TPL % random.choice(self._CHROME_VERSIONS), 'User-Agent': self._USER_AGENT_TPL % random.choice(self._CHROME_VERSIONS),
} }
for path in ('embed', 'f'): for path in (self._EMBED_WORD, self._STREAM_WORD):
page_url = url_pattern % path page_url = url_pattern % path
last = path == 'f' last = path == self._STREAM_WORD
webpage = self._download_webpage( webpage = self._download_webpage(
page_url, video_id, 'Downloading %s webpage' % path, page_url, video_id, 'Downloading %s webpage' % path,
headers=headers, fatal=last) headers=headers, fatal=last)
@ -1978,21 +1986,20 @@ class OpenloadIE(InfoExtractor):
raise ExtractorError('File not found', expected=True, video_id=video_id) raise ExtractorError('File not found', expected=True, video_id=video_id)
break break
phantom = PhantomJSwrapper(self, required_version='2.0') webpage = self._extract_decrypted_page(page_url, webpage, video_id, headers)
webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers) for element_id in self._URL_IDS:
decoded_id = get_element_by_id(element_id, webpage)
decoded_id = (get_element_by_id('streamurl', webpage) or if decoded_id:
get_element_by_id('streamuri', webpage) or break
get_element_by_id('streamurj', webpage) or if not decoded_id:
self._search_regex( decoded_id = self._search_regex(
(r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<', (r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<',
r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)', r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)',
r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<', r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<',
r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<', r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<',
r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage, r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage,
'stream URL')) 'stream URL')
video_url = 'https://%s/%s/%s?mime=true' % (host, self._REDIR_WORD, decoded_id)
video_url = 'https://%s/stream/%s?mime=true' % (host, decoded_id)
title = self._og_search_title(webpage, default=None) or self._search_regex( title = self._og_search_title(webpage, default=None) or self._search_regex(
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage, r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
@ -2012,3 +2019,44 @@ class OpenloadIE(InfoExtractor):
'subtitles': subtitles, 'subtitles': subtitles,
'http_headers': headers, 'http_headers': headers,
} }
class VerystreamIE(OpenloadIE):
IE_NAME = 'verystream'
_DOMAINS = r'(verystream\.com)'
_VALID_URL = r'''(?x)
https?://
(?P<host>
(?:www\.)?
%s
)/
(?:stream|e)/
(?P<id>[a-zA-Z0-9-_]+)
''' % _DOMAINS
_EMBED_WORD = 'e'
_STREAM_WORD = 'stream'
_REDIR_WORD = 'gettoken'
_URL_IDS = ('videolink', )
_TESTS = [{
'url': 'https://verystream.com/stream/c1GWQ9ngBBx/',
'md5': 'd3e8c5628ccb9970b65fd65269886795',
'info_dict': {
'id': 'c1GWQ9ngBBx',
'ext': 'mp4',
'title': 'Big Buck Bunny.mp4',
'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
'url': 'https://verystream.com/e/c1GWQ9ngBBx/',
'only_matching': True,
}]
def _extract_decrypted_page(self, page_url, webpage, video_id, headers):
return webpage # for Verystream, the webpage is already decrypted
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+src=["\']((?:https?://)?%s/e/[a-zA-Z0-9-_]+)'
% VerystreamIE._DOMAINS, webpage)