Merge branch 'master' into XVideos-issue-15799
This commit is contained in:
commit
e2845da44e
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.03.14*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.03.20*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.03.14**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.03.20**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2018.03.14
|
[debug] youtube-dl version 2018.03.20
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
22
ChangeLog
22
ChangeLog
@ -1,3 +1,25 @@
|
|||||||
|
version 2018.03.20
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [extractor/common] Improve thumbnail extraction for HTML5 entries
|
||||||
|
* Generalize XML manifest processing code and improve XSPF parsing
|
||||||
|
+ [extractor/common] Add _download_xml_handle
|
||||||
|
+ [extractor/common] Add support for relative URIs in _parse_xspf (#15794)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [7plus] Extract series metadata (#15862, #15906)
|
||||||
|
* [9now] Bypass geo restriction (#15920)
|
||||||
|
* [cbs] Skip unavailable assets (#13490, #13506, #15776)
|
||||||
|
+ [canalc2] Add support for HTML5 videos (#15916, #15919)
|
||||||
|
+ [ceskatelevize] Add support for iframe embeds (#15918)
|
||||||
|
+ [prosiebensat1] Add support for galileo.tv (#15894)
|
||||||
|
+ [generic] Add support for xfileshare embeds (#15879)
|
||||||
|
* [bilibili] Switch to v2 playurl API
|
||||||
|
* [bilibili] Fix and improve extraction (#15048, #15430, #15622, #15863)
|
||||||
|
* [heise] Improve extraction (#15496, #15784, #15026)
|
||||||
|
* [instagram] Fix user videos extraction (#15858)
|
||||||
|
|
||||||
|
|
||||||
version 2018.03.14
|
version 2018.03.14
|
||||||
|
|
||||||
Extractors
|
Extractors
|
||||||
|
@ -694,6 +694,55 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
|||||||
self.ie._sort_formats(formats)
|
self.ie._sort_formats(formats)
|
||||||
expect_value(self, formats, expected_formats, None)
|
expect_value(self, formats, expected_formats, None)
|
||||||
|
|
||||||
|
def test_parse_xspf(self):
|
||||||
|
_TEST_CASES = [
|
||||||
|
(
|
||||||
|
'foo_xspf',
|
||||||
|
'https://example.org/src/foo_xspf.xspf',
|
||||||
|
[{
|
||||||
|
'id': 'foo_xspf',
|
||||||
|
'title': 'Pandemonium',
|
||||||
|
'description': 'Visit http://bigbrother404.bandcamp.com',
|
||||||
|
'duration': 202.416,
|
||||||
|
'formats': [{
|
||||||
|
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
|
||||||
|
'url': 'https://example.org/src/cd1/track%201.mp3',
|
||||||
|
}],
|
||||||
|
}, {
|
||||||
|
'id': 'foo_xspf',
|
||||||
|
'title': 'Final Cartridge (Nichico Twelve Remix)',
|
||||||
|
'description': 'Visit http://bigbrother404.bandcamp.com',
|
||||||
|
'duration': 255.857,
|
||||||
|
'formats': [{
|
||||||
|
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
|
||||||
|
'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3',
|
||||||
|
}],
|
||||||
|
}, {
|
||||||
|
'id': 'foo_xspf',
|
||||||
|
'title': 'Rebuilding Nightingale',
|
||||||
|
'description': 'Visit http://bigbrother404.bandcamp.com',
|
||||||
|
'duration': 287.915,
|
||||||
|
'formats': [{
|
||||||
|
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
|
||||||
|
'url': 'https://example.org/src/track3.mp3',
|
||||||
|
}, {
|
||||||
|
'manifest_url': 'https://example.org/src/foo_xspf.xspf',
|
||||||
|
'url': 'https://example.com/track3.mp3',
|
||||||
|
}]
|
||||||
|
}]
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
for xspf_file, xspf_url, expected_entries in _TEST_CASES:
|
||||||
|
with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,
|
||||||
|
mode='r', encoding='utf-8') as f:
|
||||||
|
entries = self.ie._parse_xspf(
|
||||||
|
compat_etree_fromstring(f.read().encode('utf-8')),
|
||||||
|
xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url)
|
||||||
|
expect_value(self, entries, expected_entries, None)
|
||||||
|
for i in range(len(entries)):
|
||||||
|
expect_dict(self, entries[i], expected_entries[i])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
34
test/testdata/xspf/foo_xspf.xspf
vendored
Normal file
34
test/testdata/xspf/foo_xspf.xspf
vendored
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<playlist version="1" xmlns="http://xspf.org/ns/0/">
|
||||||
|
<date>2018-03-09T18:01:43Z</date>
|
||||||
|
<trackList>
|
||||||
|
<track>
|
||||||
|
<location>cd1/track%201.mp3</location>
|
||||||
|
<title>Pandemonium</title>
|
||||||
|
<creator>Foilverb</creator>
|
||||||
|
<annotation>Visit http://bigbrother404.bandcamp.com</annotation>
|
||||||
|
<album>Pandemonium EP</album>
|
||||||
|
<trackNum>1</trackNum>
|
||||||
|
<duration>202416</duration>
|
||||||
|
</track>
|
||||||
|
<track>
|
||||||
|
<location>../%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3</location>
|
||||||
|
<title>Final Cartridge (Nichico Twelve Remix)</title>
|
||||||
|
<annotation>Visit http://bigbrother404.bandcamp.com</annotation>
|
||||||
|
<creator>Foilverb</creator>
|
||||||
|
<album>Pandemonium EP</album>
|
||||||
|
<trackNum>2</trackNum>
|
||||||
|
<duration>255857</duration>
|
||||||
|
</track>
|
||||||
|
<track>
|
||||||
|
<location>track3.mp3</location>
|
||||||
|
<location>https://example.com/track3.mp3</location>
|
||||||
|
<title>Rebuilding Nightingale</title>
|
||||||
|
<annotation>Visit http://bigbrother404.bandcamp.com</annotation>
|
||||||
|
<creator>Foilverb</creator>
|
||||||
|
<album>Pandemonium EP</album>
|
||||||
|
<trackNum>3</trackNum>
|
||||||
|
<duration>287915</duration>
|
||||||
|
</track>
|
||||||
|
</trackList>
|
||||||
|
</playlist>
|
@ -31,6 +31,10 @@ class Canalc2IE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://www.canalc2.tv/video/%s' % video_id, video_id)
|
'http://www.canalc2.tv/video/%s' % video_id, video_id)
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.+?)</h3>',
|
||||||
|
webpage, 'title')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for _, video_url in re.findall(r'file\s*=\s*(["\'])(.+?)\1', webpage):
|
for _, video_url in re.findall(r'file\s*=\s*(["\'])(.+?)\1', webpage):
|
||||||
if video_url.startswith('rtmp://'):
|
if video_url.startswith('rtmp://'):
|
||||||
@ -49,17 +53,21 @@ class Canalc2IE(InfoExtractor):
|
|||||||
'url': video_url,
|
'url': video_url,
|
||||||
'format_id': 'http',
|
'format_id': 'http',
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
title = self._html_search_regex(
|
if formats:
|
||||||
r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.*?)</h3>', webpage, 'title')
|
info = {
|
||||||
duration = parse_duration(self._search_regex(
|
'formats': formats,
|
||||||
r'id=["\']video_duree["\'][^>]*>([^<]+)',
|
}
|
||||||
webpage, 'duration', fatal=False))
|
else:
|
||||||
|
info = self._parse_html5_media_entries(url, webpage, url)[0]
|
||||||
|
|
||||||
return {
|
self._sort_formats(info['formats'])
|
||||||
|
|
||||||
|
info.update({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'duration': duration,
|
'duration': parse_duration(self._search_regex(
|
||||||
'formats': formats,
|
r'id=["\']video_duree["\'][^>]*>([^<]+)',
|
||||||
}
|
webpage, 'duration', fatal=False)),
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
@ -2,6 +2,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .theplatform import ThePlatformFeedIE
|
from .theplatform import ThePlatformFeedIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
xpath_element,
|
xpath_element,
|
||||||
@ -61,6 +62,7 @@ class CBSIE(CBSBaseIE):
|
|||||||
asset_types = []
|
asset_types = []
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
formats = []
|
formats = []
|
||||||
|
last_e = None
|
||||||
for item in items_data.findall('.//item'):
|
for item in items_data.findall('.//item'):
|
||||||
asset_type = xpath_text(item, 'assetType')
|
asset_type = xpath_text(item, 'assetType')
|
||||||
if not asset_type or asset_type in asset_types:
|
if not asset_type or asset_type in asset_types:
|
||||||
@ -74,11 +76,17 @@ class CBSIE(CBSBaseIE):
|
|||||||
query['formats'] = 'MPEG4,M3U'
|
query['formats'] = 'MPEG4,M3U'
|
||||||
elif asset_type in ('RTMP', 'WIFI', '3G'):
|
elif asset_type in ('RTMP', 'WIFI', '3G'):
|
||||||
query['formats'] = 'MPEG4,FLV'
|
query['formats'] = 'MPEG4,FLV'
|
||||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
try:
|
||||||
update_url_query(tp_release_url, query), content_id,
|
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||||
'Downloading %s SMIL data' % asset_type)
|
update_url_query(tp_release_url, query), content_id,
|
||||||
|
'Downloading %s SMIL data' % asset_type)
|
||||||
|
except ExtractorError as e:
|
||||||
|
last_e = e
|
||||||
|
continue
|
||||||
formats.extend(tp_formats)
|
formats.extend(tp_formats)
|
||||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||||
|
if last_e and not formats:
|
||||||
|
raise last_e
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
info = self._extract_theplatform_metadata(tp_path, content_id)
|
info = self._extract_theplatform_metadata(tp_path, content_id)
|
||||||
|
@ -13,6 +13,7 @@ from ..utils import (
|
|||||||
float_or_none,
|
float_or_none,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
update_url_query,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
USER_AGENTS,
|
USER_AGENTS,
|
||||||
)
|
)
|
||||||
@ -265,6 +266,10 @@ class CeskaTelevizePoradyIE(InfoExtractor):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# iframe embed
|
||||||
|
'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -272,8 +277,11 @@ class CeskaTelevizePoradyIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
data_url = unescapeHTML(self._search_regex(
|
data_url = update_url_query(unescapeHTML(self._search_regex(
|
||||||
r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
(r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||||
webpage, 'iframe player url', group='url'))
|
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
|
||||||
|
webpage, 'iframe player url', group='url')), query={
|
||||||
|
'autoStart': 'true',
|
||||||
|
})
|
||||||
|
|
||||||
return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())
|
return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())
|
||||||
|
@ -644,19 +644,31 @@ class InfoExtractor(object):
|
|||||||
content, _ = res
|
content, _ = res
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
def _download_xml_handle(
|
||||||
|
self, url_or_request, video_id, note='Downloading XML',
|
||||||
|
errnote='Unable to download XML', transform_source=None,
|
||||||
|
fatal=True, encoding=None, data=None, headers={}, query={}):
|
||||||
|
"""Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle)"""
|
||||||
|
res = self._download_webpage_handle(
|
||||||
|
url_or_request, video_id, note, errnote, fatal=fatal,
|
||||||
|
encoding=encoding, data=data, headers=headers, query=query)
|
||||||
|
if res is False:
|
||||||
|
return res
|
||||||
|
xml_string, urlh = res
|
||||||
|
return self._parse_xml(
|
||||||
|
xml_string, video_id, transform_source=transform_source,
|
||||||
|
fatal=fatal), urlh
|
||||||
|
|
||||||
def _download_xml(self, url_or_request, video_id,
|
def _download_xml(self, url_or_request, video_id,
|
||||||
note='Downloading XML', errnote='Unable to download XML',
|
note='Downloading XML', errnote='Unable to download XML',
|
||||||
transform_source=None, fatal=True, encoding=None,
|
transform_source=None, fatal=True, encoding=None,
|
||||||
data=None, headers={}, query={}):
|
data=None, headers={}, query={}):
|
||||||
"""Return the xml as an xml.etree.ElementTree.Element"""
|
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||||
xml_string = self._download_webpage(
|
res = self._download_xml_handle(
|
||||||
url_or_request, video_id, note, errnote, fatal=fatal,
|
url_or_request, video_id, note=note, errnote=errnote,
|
||||||
encoding=encoding, data=data, headers=headers, query=query)
|
transform_source=transform_source, fatal=fatal, encoding=encoding,
|
||||||
if xml_string is False:
|
data=data, headers=headers, query=query)
|
||||||
return xml_string
|
return res if res is False else res[0]
|
||||||
return self._parse_xml(
|
|
||||||
xml_string, video_id, transform_source=transform_source,
|
|
||||||
fatal=fatal)
|
|
||||||
|
|
||||||
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
|
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
|
||||||
if transform_source:
|
if transform_source:
|
||||||
@ -1694,22 +1706,24 @@ class InfoExtractor(object):
|
|||||||
})
|
})
|
||||||
return subtitles
|
return subtitles
|
||||||
|
|
||||||
def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True):
|
def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True):
|
||||||
xspf = self._download_xml(
|
xspf = self._download_xml(
|
||||||
playlist_url, playlist_id, 'Downloading xpsf playlist',
|
xspf_url, playlist_id, 'Downloading xpsf playlist',
|
||||||
'Unable to download xspf manifest', fatal=fatal)
|
'Unable to download xspf manifest', fatal=fatal)
|
||||||
if xspf is False:
|
if xspf is False:
|
||||||
return []
|
return []
|
||||||
return self._parse_xspf(xspf, playlist_id)
|
return self._parse_xspf(
|
||||||
|
xspf, playlist_id, xspf_url=xspf_url,
|
||||||
|
xspf_base_url=base_url(xspf_url))
|
||||||
|
|
||||||
def _parse_xspf(self, playlist, playlist_id):
|
def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None):
|
||||||
NS_MAP = {
|
NS_MAP = {
|
||||||
'xspf': 'http://xspf.org/ns/0/',
|
'xspf': 'http://xspf.org/ns/0/',
|
||||||
's1': 'http://static.streamone.nl/player/ns/0',
|
's1': 'http://static.streamone.nl/player/ns/0',
|
||||||
}
|
}
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
|
for track in xspf_doc.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
|
||||||
title = xpath_text(
|
title = xpath_text(
|
||||||
track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
|
track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
|
||||||
description = xpath_text(
|
description = xpath_text(
|
||||||
@ -1719,12 +1733,18 @@ class InfoExtractor(object):
|
|||||||
duration = float_or_none(
|
duration = float_or_none(
|
||||||
xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
|
xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
|
||||||
|
|
||||||
formats = [{
|
formats = []
|
||||||
'url': location.text,
|
for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP)):
|
||||||
'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
|
format_url = urljoin(xspf_base_url, location.text)
|
||||||
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
|
if not format_url:
|
||||||
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
|
continue
|
||||||
} for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'manifest_url': xspf_url,
|
||||||
|
'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
|
||||||
|
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
|
||||||
|
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
|
||||||
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
entries.append({
|
entries.append({
|
||||||
@ -1738,18 +1758,18 @@ class InfoExtractor(object):
|
|||||||
return entries
|
return entries
|
||||||
|
|
||||||
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
|
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
|
||||||
res = self._download_webpage_handle(
|
res = self._download_xml_handle(
|
||||||
mpd_url, video_id,
|
mpd_url, video_id,
|
||||||
note=note or 'Downloading MPD manifest',
|
note=note or 'Downloading MPD manifest',
|
||||||
errnote=errnote or 'Failed to download MPD manifest',
|
errnote=errnote or 'Failed to download MPD manifest',
|
||||||
fatal=fatal)
|
fatal=fatal)
|
||||||
if res is False:
|
if res is False:
|
||||||
return []
|
return []
|
||||||
mpd, urlh = res
|
mpd_doc, urlh = res
|
||||||
mpd_base_url = base_url(urlh.geturl())
|
mpd_base_url = base_url(urlh.geturl())
|
||||||
|
|
||||||
return self._parse_mpd_formats(
|
return self._parse_mpd_formats(
|
||||||
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url,
|
mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url,
|
||||||
formats_dict=formats_dict, mpd_url=mpd_url)
|
formats_dict=formats_dict, mpd_url=mpd_url)
|
||||||
|
|
||||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
|
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
|
||||||
@ -2023,17 +2043,16 @@ class InfoExtractor(object):
|
|||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
|
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
|
||||||
res = self._download_webpage_handle(
|
res = self._download_xml_handle(
|
||||||
ism_url, video_id,
|
ism_url, video_id,
|
||||||
note=note or 'Downloading ISM manifest',
|
note=note or 'Downloading ISM manifest',
|
||||||
errnote=errnote or 'Failed to download ISM manifest',
|
errnote=errnote or 'Failed to download ISM manifest',
|
||||||
fatal=fatal)
|
fatal=fatal)
|
||||||
if res is False:
|
if res is False:
|
||||||
return []
|
return []
|
||||||
ism, urlh = res
|
ism_doc, urlh = res
|
||||||
|
|
||||||
return self._parse_ism_formats(
|
return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
|
||||||
compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
|
|
||||||
|
|
||||||
def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
|
def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
|
||||||
"""
|
"""
|
||||||
@ -2131,8 +2150,8 @@ class InfoExtractor(object):
|
|||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None):
|
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None):
|
||||||
def absolute_url(video_url):
|
def absolute_url(item_url):
|
||||||
return compat_urlparse.urljoin(base_url, video_url)
|
return urljoin(base_url, item_url)
|
||||||
|
|
||||||
def parse_content_type(content_type):
|
def parse_content_type(content_type):
|
||||||
if not content_type:
|
if not content_type:
|
||||||
@ -2189,7 +2208,7 @@ class InfoExtractor(object):
|
|||||||
if src:
|
if src:
|
||||||
_, formats = _media_formats(src, media_type)
|
_, formats = _media_formats(src, media_type)
|
||||||
media_info['formats'].extend(formats)
|
media_info['formats'].extend(formats)
|
||||||
media_info['thumbnail'] = media_attributes.get('poster')
|
media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
|
||||||
if media_content:
|
if media_content:
|
||||||
for source_tag in re.findall(r'<source[^>]+>', media_content):
|
for source_tag in re.findall(r'<source[^>]+>', media_content):
|
||||||
source_attributes = extract_attributes(source_tag)
|
source_attributes = extract_attributes(source_tag)
|
||||||
|
@ -104,6 +104,7 @@ from .mediasite import MediasiteIE
|
|||||||
from .springboardplatform import SpringboardPlatformIE
|
from .springboardplatform import SpringboardPlatformIE
|
||||||
from .yapfiles import YapFilesIE
|
from .yapfiles import YapFilesIE
|
||||||
from .vice import ViceIE
|
from .vice import ViceIE
|
||||||
|
from .xfileshare import XFileShareIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@ -2231,7 +2232,11 @@ class GenericIE(InfoExtractor):
|
|||||||
self._sort_formats(smil['formats'])
|
self._sort_formats(smil['formats'])
|
||||||
return smil
|
return smil
|
||||||
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
|
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
|
||||||
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
|
return self.playlist_result(
|
||||||
|
self._parse_xspf(
|
||||||
|
doc, video_id, xspf_url=url,
|
||||||
|
xspf_base_url=compat_str(full_response.geturl())),
|
||||||
|
video_id)
|
||||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||||
info_dict['formats'] = self._parse_mpd_formats(
|
info_dict['formats'] = self._parse_mpd_formats(
|
||||||
doc,
|
doc,
|
||||||
@ -2971,6 +2976,11 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
vice_urls, video_id, video_title, ie=ViceIE.ie_key())
|
vice_urls, video_id, video_title, ie=ViceIE.ie_key())
|
||||||
|
|
||||||
|
xfileshare_urls = XFileShareIE._extract_urls(webpage)
|
||||||
|
if xfileshare_urls:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
|
||||||
|
|
||||||
def merge_dicts(dict1, dict2):
|
def merge_dicts(dict1, dict2):
|
||||||
merged = {}
|
merged = {}
|
||||||
for k, v in dict1.items():
|
for k, v in dict1.items():
|
||||||
|
@ -4,15 +4,17 @@ from __future__ import unicode_literals
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
ExtractorError,
|
smuggle_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class NineNowIE(InfoExtractor):
|
class NineNowIE(InfoExtractor):
|
||||||
IE_NAME = '9now.com.au'
|
IE_NAME = '9now.com.au'
|
||||||
_VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/]+/){2}(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/]+/){2}(?P<id>[^/?#]+)'
|
||||||
|
_GEO_COUNTRIES = ['AU']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# clip
|
# clip
|
||||||
'url': 'https://www.9now.com.au/afl-footy-show/2016/clip-ciql02091000g0hp5oktrnytc',
|
'url': 'https://www.9now.com.au/afl-footy-show/2016/clip-ciql02091000g0hp5oktrnytc',
|
||||||
@ -75,7 +77,9 @@ class NineNowIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
'url': smuggle_url(
|
||||||
|
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||||
|
{'geo_countries': self._GEO_COUNTRIES}),
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': common_data.get('description'),
|
'description': common_data.get('description'),
|
||||||
|
@ -133,7 +133,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
|||||||
(?:
|
(?:
|
||||||
prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia
|
prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia
|
||||||
)\.(?:de|at|ch)|
|
)\.(?:de|at|ch)|
|
||||||
ran\.de|fem\.com|advopedia\.de
|
ran\.de|fem\.com|advopedia\.de|galileo\.tv/video
|
||||||
)
|
)
|
||||||
/(?P<id>.+)
|
/(?P<id>.+)
|
||||||
'''
|
'''
|
||||||
@ -326,6 +326,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
|||||||
'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge',
|
'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# geo restricted to Germany
|
||||||
|
'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel',
|
'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -343,7 +348,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
|||||||
r'"clip_id"\s*:\s+"(\d+)"',
|
r'"clip_id"\s*:\s+"(\d+)"',
|
||||||
r'clipid: "(\d+)"',
|
r'clipid: "(\d+)"',
|
||||||
r'clip[iI]d=(\d+)',
|
r'clip[iI]d=(\d+)',
|
||||||
r'clip[iI]d\s*=\s*["\'](\d+)',
|
r'clip[iI][dD]\s*=\s*["\'](\d+)',
|
||||||
r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
|
r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
|
||||||
r'proMamsId"\s*:\s*"(\d+)',
|
r'proMamsId"\s*:\s*"(\d+)',
|
||||||
r'proMamsId"\s*:\s*"(\d+)',
|
r'proMamsId"\s*:\s*"(\d+)',
|
||||||
|
@ -4,22 +4,30 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .brightcove import BrightcoveNewIE
|
from .brightcove import BrightcoveNewIE
|
||||||
from ..utils import update_url_query
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
try_get,
|
||||||
|
update_url_query,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class SevenPlusIE(BrightcoveNewIE):
|
class SevenPlusIE(BrightcoveNewIE):
|
||||||
IE_NAME = '7plus'
|
IE_NAME = '7plus'
|
||||||
_VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))'
|
_VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://7plus.com.au/BEAT?episode-id=BEAT-001',
|
'url': 'https://7plus.com.au/MTYS?episode-id=MTYS7-003',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'BEAT-001',
|
'id': 'MTYS7-003',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'S1 E1 - Help / Lucy In The Sky With Diamonds',
|
'title': 'S7 E3 - Wind Surf',
|
||||||
'description': 'md5:37718bea20a8eedaca7f7361af566131',
|
'description': 'md5:29c6a69f21accda7601278f81b46483d',
|
||||||
'uploader_id': '5303576322001',
|
'uploader_id': '5303576322001',
|
||||||
'upload_date': '20171031',
|
'upload_date': '20171201',
|
||||||
'timestamp': 1509440068,
|
'timestamp': 1512106377,
|
||||||
|
'series': 'Mighty Ships',
|
||||||
|
'season_number': 7,
|
||||||
|
'episode_number': 3,
|
||||||
|
'episode': 'Wind Surf',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'format': 'bestvideo',
|
'format': 'bestvideo',
|
||||||
@ -63,5 +71,14 @@ class SevenPlusIE(BrightcoveNewIE):
|
|||||||
value = item.get(src_key)
|
value = item.get(src_key)
|
||||||
if value:
|
if value:
|
||||||
info[dst_key] = value
|
info[dst_key] = value
|
||||||
|
info['series'] = try_get(
|
||||||
|
item, lambda x: x['seriesLogo']['name'], compat_str)
|
||||||
|
mobj = re.search(r'^S(\d+)\s+E(\d+)\s+-\s+(.+)$', info['title'])
|
||||||
|
if mobj:
|
||||||
|
info.update({
|
||||||
|
'season_number': int(mobj.group(1)),
|
||||||
|
'episode_number': int(mobj.group(2)),
|
||||||
|
'episode': mobj.group(3),
|
||||||
|
})
|
||||||
|
|
||||||
return info
|
return info
|
||||||
|
@ -118,6 +118,15 @@ class XFileShareIE(InfoExtractor):
|
|||||||
'only_matching': True
|
'only_matching': True
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
return [
|
||||||
|
mobj.group('url')
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1'
|
||||||
|
% '|'.join(site for site in list(zip(*XFileShareIE._SITES))[0]),
|
||||||
|
webpage)]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2018.03.14'
|
__version__ = '2018.03.20'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user