Merge branch 'master' into XVideos-issue-15799

This commit is contained in:
Parmjit Virk 2018-03-19 17:38:16 -05:00
commit e2845da44e
14 changed files with 254 additions and 61 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.03.14*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.03.20*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.03.14** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.03.20**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2018.03.14 [debug] youtube-dl version 2018.03.20
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -1,3 +1,25 @@
version 2018.03.20
Core
* [extractor/common] Improve thumbnail extraction for HTML5 entries
* Generalize XML manifest processing code and improve XSPF parsing
+ [extractor/common] Add _download_xml_handle
+ [extractor/common] Add support for relative URIs in _parse_xspf (#15794)
Extractors
+ [7plus] Extract series metadata (#15862, #15906)
* [9now] Bypass geo restriction (#15920)
* [cbs] Skip unavailable assets (#13490, #13506, #15776)
+ [canalc2] Add support for HTML5 videos (#15916, #15919)
+ [ceskatelevize] Add support for iframe embeds (#15918)
+ [prosiebensat1] Add support for galileo.tv (#15894)
+ [generic] Add support for xfileshare embeds (#15879)
* [bilibili] Switch to v2 playurl API
* [bilibili] Fix and improve extraction (#15048, #15430, #15622, #15863)
* [heise] Improve extraction (#15496, #15784, #15026)
* [instagram] Fix user videos extraction (#15858)
version 2018.03.14 version 2018.03.14
Extractors Extractors

View File

@ -694,6 +694,55 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
self.ie._sort_formats(formats) self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None) expect_value(self, formats, expected_formats, None)
def test_parse_xspf(self):
_TEST_CASES = [
(
'foo_xspf',
'https://example.org/src/foo_xspf.xspf',
[{
'id': 'foo_xspf',
'title': 'Pandemonium',
'description': 'Visit http://bigbrother404.bandcamp.com',
'duration': 202.416,
'formats': [{
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
'url': 'https://example.org/src/cd1/track%201.mp3',
}],
}, {
'id': 'foo_xspf',
'title': 'Final Cartridge (Nichico Twelve Remix)',
'description': 'Visit http://bigbrother404.bandcamp.com',
'duration': 255.857,
'formats': [{
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3',
}],
}, {
'id': 'foo_xspf',
'title': 'Rebuilding Nightingale',
'description': 'Visit http://bigbrother404.bandcamp.com',
'duration': 287.915,
'formats': [{
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
'url': 'https://example.org/src/track3.mp3',
}, {
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
'url': 'https://example.com/track3.mp3',
}]
}]
),
]
for xspf_file, xspf_url, expected_entries in _TEST_CASES:
with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,
mode='r', encoding='utf-8') as f:
entries = self.ie._parse_xspf(
compat_etree_fromstring(f.read().encode('utf-8')),
xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url)
expect_value(self, entries, expected_entries, None)
for i in range(len(entries)):
expect_dict(self, entries[i], expected_entries[i])
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

34
test/testdata/xspf/foo_xspf.xspf vendored Normal file
View File

@ -0,0 +1,34 @@
<?xml version="1.0" encoding="UTF-8"?>
<playlist version="1" xmlns="http://xspf.org/ns/0/">
<date>2018-03-09T18:01:43Z</date>
<trackList>
<track>
<location>cd1/track%201.mp3</location>
<title>Pandemonium</title>
<creator>Foilverb</creator>
<annotation>Visit http://bigbrother404.bandcamp.com</annotation>
<album>Pandemonium EP</album>
<trackNum>1</trackNum>
<duration>202416</duration>
</track>
<track>
<location>../%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3</location>
<title>Final Cartridge (Nichico Twelve Remix)</title>
<annotation>Visit http://bigbrother404.bandcamp.com</annotation>
<creator>Foilverb</creator>
<album>Pandemonium EP</album>
<trackNum>2</trackNum>
<duration>255857</duration>
</track>
<track>
<location>track3.mp3</location>
<location>https://example.com/track3.mp3</location>
<title>Rebuilding Nightingale</title>
<annotation>Visit http://bigbrother404.bandcamp.com</annotation>
<creator>Foilverb</creator>
<album>Pandemonium EP</album>
<trackNum>3</trackNum>
<duration>287915</duration>
</track>
</trackList>
</playlist>

View File

@ -31,6 +31,10 @@ class Canalc2IE(InfoExtractor):
webpage = self._download_webpage( webpage = self._download_webpage(
'http://www.canalc2.tv/video/%s' % video_id, video_id) 'http://www.canalc2.tv/video/%s' % video_id, video_id)
title = self._html_search_regex(
r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.+?)</h3>',
webpage, 'title')
formats = [] formats = []
for _, video_url in re.findall(r'file\s*=\s*(["\'])(.+?)\1', webpage): for _, video_url in re.findall(r'file\s*=\s*(["\'])(.+?)\1', webpage):
if video_url.startswith('rtmp://'): if video_url.startswith('rtmp://'):
@ -49,17 +53,21 @@ class Canalc2IE(InfoExtractor):
'url': video_url, 'url': video_url,
'format_id': 'http', 'format_id': 'http',
}) })
self._sort_formats(formats)
title = self._html_search_regex( if formats:
r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.*?)</h3>', webpage, 'title') info = {
duration = parse_duration(self._search_regex(
r'id=["\']video_duree["\'][^>]*>([^<]+)',
webpage, 'duration', fatal=False))
return {
'id': video_id,
'title': title,
'duration': duration,
'formats': formats, 'formats': formats,
} }
else:
info = self._parse_html5_media_entries(url, webpage, url)[0]
self._sort_formats(info['formats'])
info.update({
'id': video_id,
'title': title,
'duration': parse_duration(self._search_regex(
r'id=["\']video_duree["\'][^>]*>([^<]+)',
webpage, 'duration', fatal=False)),
})
return info

View File

@ -2,6 +2,7 @@ from __future__ import unicode_literals
from .theplatform import ThePlatformFeedIE from .theplatform import ThePlatformFeedIE
from ..utils import ( from ..utils import (
ExtractorError,
int_or_none, int_or_none,
find_xpath_attr, find_xpath_attr,
xpath_element, xpath_element,
@ -61,6 +62,7 @@ class CBSIE(CBSBaseIE):
asset_types = [] asset_types = []
subtitles = {} subtitles = {}
formats = [] formats = []
last_e = None
for item in items_data.findall('.//item'): for item in items_data.findall('.//item'):
asset_type = xpath_text(item, 'assetType') asset_type = xpath_text(item, 'assetType')
if not asset_type or asset_type in asset_types: if not asset_type or asset_type in asset_types:
@ -74,11 +76,17 @@ class CBSIE(CBSBaseIE):
query['formats'] = 'MPEG4,M3U' query['formats'] = 'MPEG4,M3U'
elif asset_type in ('RTMP', 'WIFI', '3G'): elif asset_type in ('RTMP', 'WIFI', '3G'):
query['formats'] = 'MPEG4,FLV' query['formats'] = 'MPEG4,FLV'
try:
tp_formats, tp_subtitles = self._extract_theplatform_smil( tp_formats, tp_subtitles = self._extract_theplatform_smil(
update_url_query(tp_release_url, query), content_id, update_url_query(tp_release_url, query), content_id,
'Downloading %s SMIL data' % asset_type) 'Downloading %s SMIL data' % asset_type)
except ExtractorError as e:
last_e = e
continue
formats.extend(tp_formats) formats.extend(tp_formats)
subtitles = self._merge_subtitles(subtitles, tp_subtitles) subtitles = self._merge_subtitles(subtitles, tp_subtitles)
if last_e and not formats:
raise last_e
self._sort_formats(formats) self._sort_formats(formats)
info = self._extract_theplatform_metadata(tp_path, content_id) info = self._extract_theplatform_metadata(tp_path, content_id)

View File

@ -13,6 +13,7 @@ from ..utils import (
float_or_none, float_or_none,
sanitized_Request, sanitized_Request,
unescapeHTML, unescapeHTML,
update_url_query,
urlencode_postdata, urlencode_postdata,
USER_AGENTS, USER_AGENTS,
) )
@ -265,6 +266,10 @@ class CeskaTelevizePoradyIE(InfoExtractor):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
}, {
# iframe embed
'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -272,8 +277,11 @@ class CeskaTelevizePoradyIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
data_url = unescapeHTML(self._search_regex( data_url = update_url_query(unescapeHTML(self._search_regex(
r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1', (r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'iframe player url', group='url')) r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
webpage, 'iframe player url', group='url')), query={
'autoStart': 'true',
})
return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key()) return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())

View File

@ -644,19 +644,31 @@ class InfoExtractor(object):
content, _ = res content, _ = res
return content return content
def _download_xml_handle(
self, url_or_request, video_id, note='Downloading XML',
errnote='Unable to download XML', transform_source=None,
fatal=True, encoding=None, data=None, headers={}, query={}):
"""Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle)"""
res = self._download_webpage_handle(
url_or_request, video_id, note, errnote, fatal=fatal,
encoding=encoding, data=data, headers=headers, query=query)
if res is False:
return res
xml_string, urlh = res
return self._parse_xml(
xml_string, video_id, transform_source=transform_source,
fatal=fatal), urlh
def _download_xml(self, url_or_request, video_id, def _download_xml(self, url_or_request, video_id,
note='Downloading XML', errnote='Unable to download XML', note='Downloading XML', errnote='Unable to download XML',
transform_source=None, fatal=True, encoding=None, transform_source=None, fatal=True, encoding=None,
data=None, headers={}, query={}): data=None, headers={}, query={}):
"""Return the xml as an xml.etree.ElementTree.Element""" """Return the xml as an xml.etree.ElementTree.Element"""
xml_string = self._download_webpage( res = self._download_xml_handle(
url_or_request, video_id, note, errnote, fatal=fatal, url_or_request, video_id, note=note, errnote=errnote,
encoding=encoding, data=data, headers=headers, query=query) transform_source=transform_source, fatal=fatal, encoding=encoding,
if xml_string is False: data=data, headers=headers, query=query)
return xml_string return res if res is False else res[0]
return self._parse_xml(
xml_string, video_id, transform_source=transform_source,
fatal=fatal)
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True): def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
if transform_source: if transform_source:
@ -1694,22 +1706,24 @@ class InfoExtractor(object):
}) })
return subtitles return subtitles
def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True): def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True):
xspf = self._download_xml( xspf = self._download_xml(
playlist_url, playlist_id, 'Downloading xpsf playlist', xspf_url, playlist_id, 'Downloading xpsf playlist',
'Unable to download xspf manifest', fatal=fatal) 'Unable to download xspf manifest', fatal=fatal)
if xspf is False: if xspf is False:
return [] return []
return self._parse_xspf(xspf, playlist_id) return self._parse_xspf(
xspf, playlist_id, xspf_url=xspf_url,
xspf_base_url=base_url(xspf_url))
def _parse_xspf(self, playlist, playlist_id): def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None):
NS_MAP = { NS_MAP = {
'xspf': 'http://xspf.org/ns/0/', 'xspf': 'http://xspf.org/ns/0/',
's1': 'http://static.streamone.nl/player/ns/0', 's1': 'http://static.streamone.nl/player/ns/0',
} }
entries = [] entries = []
for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)): for track in xspf_doc.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
title = xpath_text( title = xpath_text(
track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id) track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
description = xpath_text( description = xpath_text(
@ -1719,12 +1733,18 @@ class InfoExtractor(object):
duration = float_or_none( duration = float_or_none(
xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000) xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
formats = [{ formats = []
'url': location.text, for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP)):
format_url = urljoin(xspf_base_url, location.text)
if not format_url:
continue
formats.append({
'url': format_url,
'manifest_url': xspf_url,
'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
} for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))] })
self._sort_formats(formats) self._sort_formats(formats)
entries.append({ entries.append({
@ -1738,18 +1758,18 @@ class InfoExtractor(object):
return entries return entries
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}): def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
res = self._download_webpage_handle( res = self._download_xml_handle(
mpd_url, video_id, mpd_url, video_id,
note=note or 'Downloading MPD manifest', note=note or 'Downloading MPD manifest',
errnote=errnote or 'Failed to download MPD manifest', errnote=errnote or 'Failed to download MPD manifest',
fatal=fatal) fatal=fatal)
if res is False: if res is False:
return [] return []
mpd, urlh = res mpd_doc, urlh = res
mpd_base_url = base_url(urlh.geturl()) mpd_base_url = base_url(urlh.geturl())
return self._parse_mpd_formats( return self._parse_mpd_formats(
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url,
formats_dict=formats_dict, mpd_url=mpd_url) formats_dict=formats_dict, mpd_url=mpd_url)
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None): def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
@ -2023,17 +2043,16 @@ class InfoExtractor(object):
return formats return formats
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True): def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
res = self._download_webpage_handle( res = self._download_xml_handle(
ism_url, video_id, ism_url, video_id,
note=note or 'Downloading ISM manifest', note=note or 'Downloading ISM manifest',
errnote=errnote or 'Failed to download ISM manifest', errnote=errnote or 'Failed to download ISM manifest',
fatal=fatal) fatal=fatal)
if res is False: if res is False:
return [] return []
ism, urlh = res ism_doc, urlh = res
return self._parse_ism_formats( return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None): def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
""" """
@ -2131,8 +2150,8 @@ class InfoExtractor(object):
return formats return formats
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None): def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None):
def absolute_url(video_url): def absolute_url(item_url):
return compat_urlparse.urljoin(base_url, video_url) return urljoin(base_url, item_url)
def parse_content_type(content_type): def parse_content_type(content_type):
if not content_type: if not content_type:
@ -2189,7 +2208,7 @@ class InfoExtractor(object):
if src: if src:
_, formats = _media_formats(src, media_type) _, formats = _media_formats(src, media_type)
media_info['formats'].extend(formats) media_info['formats'].extend(formats)
media_info['thumbnail'] = media_attributes.get('poster') media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
if media_content: if media_content:
for source_tag in re.findall(r'<source[^>]+>', media_content): for source_tag in re.findall(r'<source[^>]+>', media_content):
source_attributes = extract_attributes(source_tag) source_attributes = extract_attributes(source_tag)

View File

@ -104,6 +104,7 @@ from .mediasite import MediasiteIE
from .springboardplatform import SpringboardPlatformIE from .springboardplatform import SpringboardPlatformIE
from .yapfiles import YapFilesIE from .yapfiles import YapFilesIE
from .vice import ViceIE from .vice import ViceIE
from .xfileshare import XFileShareIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -2231,7 +2232,11 @@ class GenericIE(InfoExtractor):
self._sort_formats(smil['formats']) self._sort_formats(smil['formats'])
return smil return smil
elif doc.tag == '{http://xspf.org/ns/0/}playlist': elif doc.tag == '{http://xspf.org/ns/0/}playlist':
return self.playlist_result(self._parse_xspf(doc, video_id), video_id) return self.playlist_result(
self._parse_xspf(
doc, video_id, xspf_url=url,
xspf_base_url=compat_str(full_response.geturl())),
video_id)
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
info_dict['formats'] = self._parse_mpd_formats( info_dict['formats'] = self._parse_mpd_formats(
doc, doc,
@ -2971,6 +2976,11 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
vice_urls, video_id, video_title, ie=ViceIE.ie_key()) vice_urls, video_id, video_title, ie=ViceIE.ie_key())
xfileshare_urls = XFileShareIE._extract_urls(webpage)
if xfileshare_urls:
return self.playlist_from_matches(
xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
def merge_dicts(dict1, dict2): def merge_dicts(dict1, dict2):
merged = {} merged = {}
for k, v in dict1.items(): for k, v in dict1.items():

View File

@ -4,15 +4,17 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError,
int_or_none, int_or_none,
float_or_none, float_or_none,
ExtractorError, smuggle_url,
) )
class NineNowIE(InfoExtractor): class NineNowIE(InfoExtractor):
IE_NAME = '9now.com.au' IE_NAME = '9now.com.au'
_VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/]+/){2}(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/]+/){2}(?P<id>[^/?#]+)'
_GEO_COUNTRIES = ['AU']
_TESTS = [{ _TESTS = [{
# clip # clip
'url': 'https://www.9now.com.au/afl-footy-show/2016/clip-ciql02091000g0hp5oktrnytc', 'url': 'https://www.9now.com.au/afl-footy-show/2016/clip-ciql02091000g0hp5oktrnytc',
@ -75,7 +77,9 @@ class NineNowIE(InfoExtractor):
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'url': smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
{'geo_countries': self._GEO_COUNTRIES}),
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': common_data.get('description'), 'description': common_data.get('description'),

View File

@ -133,7 +133,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
(?: (?:
prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia
)\.(?:de|at|ch)| )\.(?:de|at|ch)|
ran\.de|fem\.com|advopedia\.de ran\.de|fem\.com|advopedia\.de|galileo\.tv/video
) )
/(?P<id>.+) /(?P<id>.+)
''' '''
@ -326,6 +326,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge', 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge',
'only_matching': True, 'only_matching': True,
}, },
{
# geo restricted to Germany
'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden',
'only_matching': True,
},
{ {
'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel', 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel',
'only_matching': True, 'only_matching': True,
@ -343,7 +348,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
r'"clip_id"\s*:\s+"(\d+)"', r'"clip_id"\s*:\s+"(\d+)"',
r'clipid: "(\d+)"', r'clipid: "(\d+)"',
r'clip[iI]d=(\d+)', r'clip[iI]d=(\d+)',
r'clip[iI]d\s*=\s*["\'](\d+)', r'clip[iI][dD]\s*=\s*["\'](\d+)',
r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)", r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
r'proMamsId&quot;\s*:\s*&quot;(\d+)', r'proMamsId&quot;\s*:\s*&quot;(\d+)',
r'proMamsId"\s*:\s*"(\d+)', r'proMamsId"\s*:\s*"(\d+)',

View File

@ -4,22 +4,30 @@ from __future__ import unicode_literals
import re import re
from .brightcove import BrightcoveNewIE from .brightcove import BrightcoveNewIE
from ..utils import update_url_query from ..compat import compat_str
from ..utils import (
try_get,
update_url_query,
)
class SevenPlusIE(BrightcoveNewIE): class SevenPlusIE(BrightcoveNewIE):
IE_NAME = '7plus' IE_NAME = '7plus'
_VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))' _VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))'
_TESTS = [{ _TESTS = [{
'url': 'https://7plus.com.au/BEAT?episode-id=BEAT-001', 'url': 'https://7plus.com.au/MTYS?episode-id=MTYS7-003',
'info_dict': { 'info_dict': {
'id': 'BEAT-001', 'id': 'MTYS7-003',
'ext': 'mp4', 'ext': 'mp4',
'title': 'S1 E1 - Help / Lucy In The Sky With Diamonds', 'title': 'S7 E3 - Wind Surf',
'description': 'md5:37718bea20a8eedaca7f7361af566131', 'description': 'md5:29c6a69f21accda7601278f81b46483d',
'uploader_id': '5303576322001', 'uploader_id': '5303576322001',
'upload_date': '20171031', 'upload_date': '20171201',
'timestamp': 1509440068, 'timestamp': 1512106377,
'series': 'Mighty Ships',
'season_number': 7,
'episode_number': 3,
'episode': 'Wind Surf',
}, },
'params': { 'params': {
'format': 'bestvideo', 'format': 'bestvideo',
@ -63,5 +71,14 @@ class SevenPlusIE(BrightcoveNewIE):
value = item.get(src_key) value = item.get(src_key)
if value: if value:
info[dst_key] = value info[dst_key] = value
info['series'] = try_get(
item, lambda x: x['seriesLogo']['name'], compat_str)
mobj = re.search(r'^S(\d+)\s+E(\d+)\s+-\s+(.+)$', info['title'])
if mobj:
info.update({
'season_number': int(mobj.group(1)),
'episode_number': int(mobj.group(2)),
'episode': mobj.group(3),
})
return info return info

View File

@ -118,6 +118,15 @@ class XFileShareIE(InfoExtractor):
'only_matching': True 'only_matching': True
}] }]
@staticmethod
def _extract_urls(webpage):
return [
mobj.group('url')
for mobj in re.finditer(
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1'
% '|'.join(site for site in list(zip(*XFileShareIE._SITES))[0]),
webpage)]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2018.03.14' __version__ = '2018.03.20'