Merge remote-tracking branch 'origin/master' into paj/sbs-news-without-id
This commit is contained in:
commit
08a92ee752
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.11. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.05.11**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.05.20**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||
@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2019.05.11
|
||||
[debug] youtube-dl version 2019.05.20
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@ -19,7 +19,7 @@ labels: 'site-support-request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.11. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a new site support request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.05.11**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.05.20**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||
|
@ -18,13 +18,13 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.11. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a site feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.05.11**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.05.20**
|
||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||
|
||||
|
||||
|
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.11. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support issue
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.05.11**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.05.20**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||
@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2019.05.11
|
||||
[debug] youtube-dl version 2019.05.20
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@ -19,13 +19,13 @@ labels: 'request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.11. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.05.11**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.05.20**
|
||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||
|
||||
|
||||
|
19
ChangeLog
19
ChangeLog
@ -1,3 +1,22 @@
|
||||
version 2019.05.20
|
||||
|
||||
Core
|
||||
+ [extractor/common] Move workaround for applying first Set-Cookie header
|
||||
into a separate _apply_first_set_cookie_header method
|
||||
|
||||
Extractors
|
||||
* [safari] Fix authentication (#21090)
|
||||
* [vk] Use _apply_first_set_cookie_header
|
||||
* [vrt] Fix extraction (#20527)
|
||||
+ [canvas] Add support for vrtnieuws and sporza site ids and extract
|
||||
AES HLS formats
|
||||
+ [vrv] Extract captions (#19238)
|
||||
* [tele5] Improve video id extraction
|
||||
* [tele5] Relax URL regular expression (#21020, #21063)
|
||||
* [svtplay] Update API URL (#21075)
|
||||
+ [yahoo:gyao] Add X-User-Agent header to dam proxy requests (#21071)
|
||||
|
||||
|
||||
version 2019.05.11
|
||||
|
||||
Core
|
||||
|
@ -1071,7 +1071,7 @@
|
||||
- **VoxMediaVolume**
|
||||
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **Vrak**
|
||||
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
|
||||
- **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza
|
||||
- **VrtNU**: VrtNU.be
|
||||
- **vrv**
|
||||
- **vrv:series**
|
||||
|
@ -17,7 +17,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class CanvasIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrtvideo)/assets/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'md5': '90139b746a0a9bd7bb631283f6e2a64e',
|
||||
@ -35,6 +35,10 @@ class CanvasIE(InfoExtractor):
|
||||
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_HLS_ENTRY_PROTOCOLS_MAP = {
|
||||
'HLS': 'm3u8_native',
|
||||
'HLS_AES': 'm3u8',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@ -52,9 +56,9 @@ class CanvasIE(InfoExtractor):
|
||||
format_url, format_type = target.get('url'), target.get('type')
|
||||
if not format_url or not format_type:
|
||||
continue
|
||||
if format_type == 'HLS':
|
||||
if format_type in self._HLS_ENTRY_PROTOCOLS_MAP:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type],
|
||||
m3u8_id=format_type, fatal=False))
|
||||
elif format_type == 'HDS':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
|
@ -2817,6 +2817,33 @@ class InfoExtractor(object):
|
||||
self._downloader.cookiejar.add_cookie_header(req)
|
||||
return compat_cookies.SimpleCookie(req.get_header('Cookie'))
|
||||
|
||||
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
||||
"""
|
||||
Apply first Set-Cookie header instead of the last. Experimental.
|
||||
|
||||
Some sites (e.g. [1-3]) may serve two cookies under the same name
|
||||
in Set-Cookie header and expect the first (old) one to be set rather
|
||||
than second (new). However, as of RFC6265 the newer one cookie
|
||||
should be set into cookie store what actually happens.
|
||||
We will workaround this issue by resetting the cookie to
|
||||
the first one manually.
|
||||
1. https://new.vk.com/
|
||||
2. https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201
|
||||
3. https://learning.oreilly.com/
|
||||
"""
|
||||
for header, cookies in url_handle.headers.items():
|
||||
if header.lower() != 'set-cookie':
|
||||
continue
|
||||
if sys.version_info[0] >= 3:
|
||||
cookies = cookies.encode('iso-8859-1')
|
||||
cookies = cookies.decode('utf-8')
|
||||
cookie_value = re.search(
|
||||
r'%s=(.+?);.*?\b[Dd]omain=(.+?)(?:[,;]|$)' % cookie, cookies)
|
||||
if cookie_value:
|
||||
value, domain = cookie_value.groups()
|
||||
self._set_cookie(domain, cookie, value)
|
||||
break
|
||||
|
||||
def get_testcases(self, include_onlymatching=False):
|
||||
t = getattr(self, '_TEST', None)
|
||||
if t:
|
||||
|
@ -1,15 +1,18 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
urlencode_postdata,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
@ -31,44 +34,52 @@ class SafariBaseIE(InfoExtractor):
|
||||
if username is None:
|
||||
return
|
||||
|
||||
headers = std_headers.copy()
|
||||
if 'Referer' not in headers:
|
||||
headers['Referer'] = self._LOGIN_URL
|
||||
_, urlh = self._download_webpage_handle(
|
||||
'https://learning.oreilly.com/accounts/login-check/', None,
|
||||
'Downloading login page')
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login form', headers=headers)
|
||||
def is_logged(urlh):
|
||||
return 'learning.oreilly.com/home/' in compat_str(urlh.geturl())
|
||||
|
||||
def is_logged(webpage):
|
||||
return any(re.search(p, webpage) for p in (
|
||||
r'href=["\']/accounts/logout/', r'>Sign Out<'))
|
||||
|
||||
if is_logged(login_page):
|
||||
if is_logged(urlh):
|
||||
self.LOGGED_IN = True
|
||||
return
|
||||
|
||||
csrf = self._html_search_regex(
|
||||
r"name='csrfmiddlewaretoken'\s+value='([^']+)'",
|
||||
login_page, 'csrf token')
|
||||
redirect_url = compat_str(urlh.geturl())
|
||||
parsed_url = compat_urlparse.urlparse(redirect_url)
|
||||
qs = compat_parse_qs(parsed_url.query)
|
||||
next_uri = compat_urlparse.urljoin(
|
||||
'https://api.oreilly.com', qs['next'][0])
|
||||
|
||||
login_form = {
|
||||
'csrfmiddlewaretoken': csrf,
|
||||
'email': username,
|
||||
'password1': password,
|
||||
'login': 'Sign In',
|
||||
'next': '',
|
||||
}
|
||||
auth, urlh = self._download_json_handle(
|
||||
'https://www.oreilly.com/member/auth/login/', None, 'Logging in',
|
||||
data=json.dumps({
|
||||
'email': username,
|
||||
'password': password,
|
||||
'redirect_uri': next_uri,
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Referer': redirect_url,
|
||||
}, expected_status=400)
|
||||
|
||||
request = sanitized_Request(
|
||||
self._LOGIN_URL, urlencode_postdata(login_form), headers=headers)
|
||||
login_page = self._download_webpage(
|
||||
request, None, 'Logging in')
|
||||
|
||||
if not is_logged(login_page):
|
||||
credentials = auth.get('credentials')
|
||||
if (not auth.get('logged_in') and not auth.get('redirect_uri')
|
||||
and credentials):
|
||||
raise ExtractorError(
|
||||
'Login failed; make sure your credentials are correct and try again.',
|
||||
expected=True)
|
||||
'Unable to login: %s' % credentials, expected=True)
|
||||
|
||||
self.LOGGED_IN = True
|
||||
# oreilly serves two same groot_sessionid cookies in Set-Cookie header
|
||||
# and expects first one to be actually set
|
||||
self._apply_first_set_cookie_header(urlh, 'groot_sessionid')
|
||||
|
||||
_, urlh = self._download_webpage_handle(
|
||||
auth.get('redirect_uri') or next_uri, None, 'Completing login',)
|
||||
|
||||
if is_logged(urlh):
|
||||
self.LOGGED_IN = True
|
||||
return
|
||||
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
class SafariIE(SafariBaseIE):
|
||||
@ -76,7 +87,7 @@ class SafariIE(SafariBaseIE):
|
||||
IE_DESC = 'safaribooksonline.com online video'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/
|
||||
(?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/
|
||||
(?:
|
||||
library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html|
|
||||
videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+)
|
||||
@ -107,6 +118,9 @@ class SafariIE(SafariBaseIE):
|
||||
}, {
|
||||
'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/00_SeriesIntro.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PARTNER_ID = '1926081'
|
||||
@ -163,7 +177,7 @@ class SafariIE(SafariBaseIE):
|
||||
|
||||
class SafariApiIE(SafariBaseIE):
|
||||
IE_NAME = 'safari:api'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
|
||||
@ -188,7 +202,7 @@ class SafariCourseIE(SafariBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/
|
||||
(?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/
|
||||
(?:
|
||||
library/view/[^/]+|
|
||||
api/v1/book|
|
||||
@ -219,6 +233,9 @@ class SafariCourseIE(SafariBaseIE):
|
||||
}, {
|
||||
'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
|
@ -185,7 +185,7 @@ class SVTPlayIE(SVTPlayBaseIE):
|
||||
|
||||
def _extract_by_video_id(self, video_id, webpage=None):
|
||||
data = self._download_json(
|
||||
'https://api.svt.se/video/%s' % video_id,
|
||||
'https://api.svt.se/videoplayer-api/video/%s' % video_id,
|
||||
video_id, headers=self.geo_verification_headers())
|
||||
info_dict = self._extract_video(data, video_id)
|
||||
if not info_dict.get('title'):
|
||||
|
@ -7,7 +7,7 @@ from ..compat import compat_urlparse
|
||||
|
||||
|
||||
class Tele5IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:mediathek|tv)/(?P<id>[^?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416',
|
||||
'info_dict': {
|
||||
@ -21,10 +21,22 @@ class Tele5IE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.tele5.de/tv/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
|
||||
'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tele5.de/tv/dark-matter/videos',
|
||||
'url': 'https://www.tele5.de/video-clip/?ve_id=1609440',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tele5.de/filme/schlefaz-dragon-crusaders/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tele5.de/filme/making-of/avengers-endgame/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tele5.de/star-trek/raumschiff-voyager/ganze-folge/das-vinculum/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tele5.de/anders-ist-sevda/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@ -36,8 +48,9 @@ class Tele5IE(InfoExtractor):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._html_search_regex(
|
||||
r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](\d+)',
|
||||
webpage, 'video id')
|
||||
(r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](\d+)',
|
||||
r'\s+id\s*=\s*["\']player_(\d{6,})',
|
||||
r'\bdata-id\s*=\s*["\'](\d{6,})'), webpage, 'video id')
|
||||
|
||||
return self.url_result(
|
||||
'https://api.nexx.cloud/v3/759/videos/byid/%s' % video_id,
|
||||
|
@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
||||
|
||||
import collections
|
||||
import re
|
||||
import sys
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
@ -45,24 +44,9 @@ class VKBaseIE(InfoExtractor):
|
||||
'pass': password.encode('cp1251'),
|
||||
})
|
||||
|
||||
# https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header
|
||||
# and expects the first one to be set rather than second (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201).
|
||||
# As of RFC6265 the newer one cookie should be set into cookie store
|
||||
# what actually happens.
|
||||
# We will workaround this VK issue by resetting the remixlhk cookie to
|
||||
# the first one manually.
|
||||
for header, cookies in url_handle.headers.items():
|
||||
if header.lower() != 'set-cookie':
|
||||
continue
|
||||
if sys.version_info[0] >= 3:
|
||||
cookies = cookies.encode('iso-8859-1')
|
||||
cookies = cookies.decode('utf-8')
|
||||
remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies)
|
||||
if remixlhk:
|
||||
value, domain = remixlhk.groups()
|
||||
self._set_cookie(domain, 'remixlhk', value)
|
||||
break
|
||||
# vk serves two same remixlhk cookies in Set-Cookie header and expects
|
||||
# first one to be actually set
|
||||
self._apply_first_set_cookie_header(url_handle, 'remixlhk')
|
||||
|
||||
login_page = self._download_webpage(
|
||||
'https://login.vk.com/?act=login', None,
|
||||
|
@ -5,150 +5,83 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_element_by_class,
|
||||
strip_or_none,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class VRTIE(InfoExtractor):
|
||||
IE_DESC = 'deredactie.be, sporza.be, cobra.be and cobra.canvas.be'
|
||||
_VALID_URL = r'https?://(?:deredactie|sporza|cobra(?:\.canvas)?)\.be/cm/(?:[^/]+/)+(?P<id>[^/]+)/*'
|
||||
_TESTS = [
|
||||
# deredactie.be
|
||||
{
|
||||
'url': 'http://deredactie.be/cm/vrtnieuws/videozone/programmas/journaal/EP_141025_JOL',
|
||||
'md5': '4cebde1eb60a53782d4f3992cbd46ec8',
|
||||
'info_dict': {
|
||||
'id': '2129880',
|
||||
'ext': 'flv',
|
||||
'title': 'Het journaal L - 25/10/14',
|
||||
'description': None,
|
||||
'timestamp': 1414271750.949,
|
||||
'upload_date': '20141025',
|
||||
'duration': 929,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
IE_DESC = 'VRT NWS, Flanders News, Flandern Info and Sporza'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>vrt\.be/vrtnws|sporza\.be)/[a-z]{2}/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.vrt.be/vrtnws/nl/2019/05/15/beelden-van-binnenkant-notre-dame-een-maand-na-de-brand/',
|
||||
'md5': 'e1663accf5cf13f375f3cd0d10476669',
|
||||
'info_dict': {
|
||||
'id': 'pbs-pub-7855fc7b-1448-49bc-b073-316cb60caa71$vid-2ca50305-c38a-4762-9890-65cbd098b7bd',
|
||||
'ext': 'mp4',
|
||||
'title': 'Beelden van binnenkant Notre-Dame, één maand na de brand',
|
||||
'description': 'Op maandagavond 15 april ging een deel van het dakgebinte van de Parijse kathedraal in vlammen op.',
|
||||
'timestamp': 1557924660,
|
||||
'upload_date': '20190515',
|
||||
'duration': 31.2,
|
||||
},
|
||||
# sporza.be
|
||||
{
|
||||
'url': 'http://sporza.be/cm/sporza/videozone/programmas/extratime/EP_141020_Extra_time',
|
||||
'md5': '11f53088da9bf8e7cfc42456697953ff',
|
||||
'info_dict': {
|
||||
'id': '2124639',
|
||||
'ext': 'flv',
|
||||
'title': 'Bekijk Extra Time van 20 oktober',
|
||||
'description': 'md5:83ac5415a4f1816c6a93f8138aef2426',
|
||||
'timestamp': 1413835980.560,
|
||||
'upload_date': '20141020',
|
||||
'duration': 3238,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}, {
|
||||
'url': 'https://sporza.be/nl/2019/05/15/de-belgian-cats-zijn-klaar-voor-het-ek/',
|
||||
'md5': '910bba927566e9ab992278f647eb4b75',
|
||||
'info_dict': {
|
||||
'id': 'pbs-pub-f2c86a46-8138-413a-a4b9-a0015a16ce2c$vid-1f112b31-e58e-4379-908d-aca6d80f8818',
|
||||
'ext': 'mp4',
|
||||
'title': 'De Belgian Cats zijn klaar voor het EK mét Ann Wauters',
|
||||
'timestamp': 1557923760,
|
||||
'upload_date': '20190515',
|
||||
'duration': 115.17,
|
||||
},
|
||||
# cobra.be
|
||||
{
|
||||
'url': 'http://cobra.be/cm/cobra/videozone/rubriek/film-videozone/141022-mv-ellis-cafecorsari',
|
||||
'md5': '78a2b060a5083c4f055449a72477409d',
|
||||
'info_dict': {
|
||||
'id': '2126050',
|
||||
'ext': 'flv',
|
||||
'title': 'Bret Easton Ellis in Café Corsari',
|
||||
'description': 'md5:f699986e823f32fd6036c1855a724ee9',
|
||||
'timestamp': 1413967500.494,
|
||||
'upload_date': '20141022',
|
||||
'duration': 661,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
},
|
||||
{
|
||||
# YouTube video
|
||||
'url': 'http://deredactie.be/cm/vrtnieuws/videozone/nieuws/cultuurenmedia/1.2622957',
|
||||
'md5': 'b8b93da1df1cea6c8556255a796b7d61',
|
||||
'info_dict': {
|
||||
'id': 'Wji-BZ0oCwg',
|
||||
'ext': 'mp4',
|
||||
'title': 'ROGUE ONE: A STAR WARS STORY Official Teaser Trailer',
|
||||
'description': 'md5:8e468944dce15567a786a67f74262583',
|
||||
'uploader': 'Star Wars',
|
||||
'uploader_id': 'starwars',
|
||||
'upload_date': '20160407',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
},
|
||||
{
|
||||
'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055',
|
||||
'info_dict': {
|
||||
'id': '2377055',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cafe Derby',
|
||||
'description': 'Lenny Van Wesemael debuteert met de langspeelfilm Café Derby. Een waar gebeurd maar ook verzonnen verhaal.',
|
||||
'upload_date': '20150626',
|
||||
'timestamp': 1435305240.769,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
]
|
||||
}, {
|
||||
'url': 'https://www.vrt.be/vrtnws/en/2019/05/15/belgium_s-eurovision-entry-falls-at-the-first-hurdle/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.vrt.be/vrtnws/de/2019/05/15/aus-fuer-eliott-im-halbfinale-des-eurosongfestivals/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_CLIENT_MAP = {
|
||||
'vrt.be/vrtnws': 'vrtnieuws',
|
||||
'sporza.be': 'sporza',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
site, display_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
attrs = extract_attributes(self._search_regex(
|
||||
r'(<[^>]+class="vrtvideo"[^>]*>)', webpage, 'vrt video'))
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
asset_id = attrs['data-videoid']
|
||||
publication_id = attrs.get('data-publicationid')
|
||||
if publication_id:
|
||||
asset_id = publication_id + '$' + asset_id
|
||||
client = attrs.get('data-client') or self._CLIENT_MAP[site]
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-video-id="([^"]+)_[^"]+"', webpage, 'video id', fatal=False)
|
||||
|
||||
src = self._search_regex(
|
||||
r'data-video-src="([^"]+)"', webpage, 'video src', default=None)
|
||||
|
||||
video_type = self._search_regex(
|
||||
r'data-video-type="([^"]+)"', webpage, 'video type', default=None)
|
||||
|
||||
if video_type == 'YouTubeVideo':
|
||||
return self.url_result(src, 'Youtube')
|
||||
|
||||
formats = []
|
||||
|
||||
mobj = re.search(
|
||||
r'data-video-iphone-server="(?P<server>[^"]+)"\s+data-video-iphone-path="(?P<path>[^"]+)"',
|
||||
webpage)
|
||||
if mobj:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
'%s/%s' % (mobj.group('server'), mobj.group('path')),
|
||||
video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
|
||||
if src:
|
||||
formats = self._extract_wowza_formats(src, video_id)
|
||||
if 'data-video-geoblocking="true"' not in webpage:
|
||||
for f in formats:
|
||||
if f['url'].startswith('rtsp://'):
|
||||
http_format = f.copy()
|
||||
http_format.update({
|
||||
'url': f['url'].replace('rtsp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''),
|
||||
'format_id': f['format_id'].replace('rtsp', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(http_format)
|
||||
|
||||
if not formats and 'data-video-geoblocking="true"' in webpage:
|
||||
self.raise_geo_restricted('This video is only available in Belgium')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
timestamp = float_or_none(self._search_regex(
|
||||
r'data-video-sitestat-pubdate="(\d+)"', webpage, 'timestamp', fatal=False), 1000)
|
||||
duration = float_or_none(self._search_regex(
|
||||
r'data-video-duration="(\d+)"', webpage, 'duration', fatal=False), 1000)
|
||||
title = strip_or_none(get_element_by_class(
|
||||
'vrt-title', webpage) or self._html_search_meta(
|
||||
['og:title', 'twitter:title', 'name'], webpage))
|
||||
description = self._html_search_meta(
|
||||
['og:description', 'twitter:description', 'description'], webpage)
|
||||
if description == '…':
|
||||
description = None
|
||||
timestamp = unified_timestamp(self._html_search_meta(
|
||||
'article:published_time', webpage))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'_type': 'url_transparent',
|
||||
'id': asset_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnail': attrs.get('data-posterimage'),
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'duration': float_or_none(attrs.get('data-duration'), 1000),
|
||||
'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (client, asset_id),
|
||||
'ie_key': 'Canvas',
|
||||
}
|
||||
|
@ -198,14 +198,15 @@ class VRVIE(VRVBaseIE):
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for subtitle in streams_json.get('subtitles', {}).values():
|
||||
subtitle_url = subtitle.get('url')
|
||||
if not subtitle_url:
|
||||
continue
|
||||
subtitles.setdefault(subtitle.get('locale', 'en-US'), []).append({
|
||||
'url': subtitle_url,
|
||||
'ext': subtitle.get('format', 'ass'),
|
||||
})
|
||||
for k in ('captions', 'subtitles'):
|
||||
for subtitle in streams_json.get(k, {}).values():
|
||||
subtitle_url = subtitle.get('url')
|
||||
if not subtitle_url:
|
||||
continue
|
||||
subtitles.setdefault(subtitle.get('locale', 'en-US'), []).append({
|
||||
'url': subtitle_url,
|
||||
'ext': subtitle.get('format', 'ass'),
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail in video_data.get('images', {}).get('thumbnails', []):
|
||||
|
@ -511,6 +511,8 @@ class YahooGyaOPlayerIE(InfoExtractor):
|
||||
'https://gyao.yahoo.co.jp/dam/v1/videos/' + video_id,
|
||||
video_id, query={
|
||||
'fields': 'longDescription,title,videoId',
|
||||
}, headers={
|
||||
'X-User-Agent': 'Unknown Pc GYAO!/2.0.0 Web',
|
||||
})
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2019.05.11'
|
||||
__version__ = '2019.05.20'
|
||||
|
Loading…
x
Reference in New Issue
Block a user