Merge branch 'master' of git@github.com:rg3/youtube-dl.git into extractor_orfium

This commit is contained in:
Surya Oktafendri 2018-04-03 10:29:06 +07:00
commit 17eab653e1
No known key found for this signature in database
GPG Key ID: 8CAB076E32F1FC8D
15 changed files with 387 additions and 133 deletions

View File

@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.03.26.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.03.26.1**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.04.03*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.04.03**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2018.03.26.1
[debug] youtube-dl version 2018.04.03
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@ -1,3 +1,20 @@
version 2018.04.03
Extractors
+ [tvnow] Add support for shows (#15837)
* [dramafever] Fix authentication (#16067)
* [afreecatv] Use partial view only when necessary (#14450)
+ [afreecatv] Add support for authentication (#14450)
+ [nationalgeographic] Add support for new URL schema (#16001, #16054)
* [xvideos] Fix thumbnail extraction (#15978, #15979)
* [medialaan] Fix vod id (#16038)
+ [openload] Add support for oload.site (#16039)
* [naver] Fix extraction (#16029)
* [dramafever] Partially switch to API v5 (#16026)
* [abc:iview] Unescape title and series meta fields (#15994)
* [videa] Extend URL regular expression (#16003)
version 2018.03.26.1
Core

View File

@ -223,7 +223,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
## Filesystem Options:
-a, --batch-file FILE File containing URLs to download ('-' for
stdin)
stdin), one URL per line. Lines starting
with '#', ';' or ']' are considered as
comments and ignored.
--id Use only video ID in file name
-o, --output TEMPLATE Output filename template, see the "OUTPUT
TEMPLATE" for all the info

View File

@ -887,6 +887,7 @@
- **TVNoe**
- **TVNow**
- **TVNowList**
- **TVNowShow**
- **tvp**: Telewizja Polska
- **tvp:embed**: Telewizja Polska
- **tvp:series**

View File

@ -9,6 +9,7 @@ from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
urlencode_postdata,
xpath_text,
)
@ -28,6 +29,7 @@ class AfreecaTVIE(InfoExtractor):
)
(?P<id>\d+)
'''
_NETRC_MACHINE = 'afreecatv'
_TESTS = [{
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
@ -139,22 +141,22 @@ class AfreecaTVIE(InfoExtractor):
'skip_download': True,
},
}, {
# adult video
'url': 'http://vod.afreecatv.com/PLAYER/STATION/26542731',
# PARTIAL_ADULT
'url': 'http://vod.afreecatv.com/PLAYER/STATION/32028439',
'info_dict': {
'id': '20171001_F1AE1711_196617479_1',
'id': '20180327_27901457_202289533_1',
'ext': 'mp4',
'title': '[생]서아 초심 찾기 방송 (part 1)',
'title': '[생]빨개요♥ (part 1)',
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'BJ서아',
'uploader': '[SA]서아',
'uploader_id': 'bjdyrksu',
'upload_date': '20171001',
'duration': 3600,
'age_limit': 18,
'upload_date': '20180327',
'duration': 3601,
},
'params': {
'skip_download': True,
},
'expected_warnings': ['adult content'],
}, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
'only_matching': True,
@ -172,6 +174,51 @@ class AfreecaTVIE(InfoExtractor):
video_key['part'] = int(m.group('part'))
return video_key
def _real_initialize(self):
self._login()
def _login(self):
username, password = self._get_login_info()
if username is None:
return
login_form = {
'szWork': 'login',
'szType': 'json',
'szUid': username,
'szPassword': password,
'isSaveId': 'false',
'szScriptVar': 'oLoginRet',
'szAction': '',
}
response = self._download_json(
'https://login.afreecatv.com/app/LoginAction.php', None,
'Logging in', data=urlencode_postdata(login_form))
_ERRORS = {
-4: 'Your account has been suspended due to a violation of our terms and policies.',
-5: 'https://member.afreecatv.com/app/user_delete_progress.php',
-6: 'https://login.afreecatv.com/membership/changeMember.php',
-8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
-9: 'https://member.afreecatv.com/app/pop_login_block.php',
-11: 'https://login.afreecatv.com/afreeca/second_login.php',
-12: 'https://member.afreecatv.com/app/user_security.php',
0: 'The username does not exist or you have entered the wrong password.',
-1: 'The username does not exist or you have entered the wrong password.',
-3: 'You have entered your username/password incorrectly.',
-7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
-10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
-32008: 'You have failed to log in. Please contact our Help Center.',
}
result = int_or_none(response.get('RESULT'))
if result != 1:
error = _ERRORS.get(result, 'You have failed to log in.')
raise ExtractorError(
'Unable to login: %s said: %s' % (self.IE_NAME, error),
expected=True)
def _real_extract(self, url):
video_id = self._match_id(url)
@ -187,22 +234,42 @@ class AfreecaTVIE(InfoExtractor):
r'nBbsNo\s*=\s*(\d+)', webpage, 'bbs')
video_id = self._search_regex(
r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id)
print(video_id, station_id, bbs_id)
video_xml = self._download_xml(
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
video_id, headers={
'Referer': url,
}, query={
partial_view = False
for _ in range(2):
query = {
'nTitleNo': video_id,
'nStationNo': station_id,
'nBbsNo': bbs_id,
'partialView': 'SKIP_ADULT',
})
}
if partial_view:
query['partialView'] = 'SKIP_ADULT'
video_xml = self._download_xml(
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
video_id, 'Downloading video info XML%s'
% (' (skipping adult)' if partial_view else ''),
video_id, headers={
'Referer': url,
}, query=query)
flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
if flag and flag != 'SUCCEED':
if flag and flag == 'SUCCEED':
break
if flag == 'PARTIAL_ADULT':
self._downloader.report_warning(
'In accordance with local laws and regulations, underage users are restricted from watching adult content. '
'Only content suitable for all ages will be downloaded. '
'Provide account credentials if you wish to download restricted content.')
partial_view = True
continue
elif flag == 'ADULT':
error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.'
else:
error = flag
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, flag), expected=True)
'%s said: %s' % (self.IE_NAME, error), expected=True)
else:
raise ExtractorError('Unable to download video info')
video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
if video_element is None or video_element.text is None:

View File

@ -117,9 +117,9 @@ class BiliBiliIE(InfoExtractor):
r'cid(?:["\']:|=)(\d+)', webpage, 'cid',
default=None
) or compat_parse_qs(self._search_regex(
[r'1EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
r'1EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
r'1<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
webpage, 'player parameters'))['cid'][0]
else:
if 'no_bangumi_tip' not in smuggled_data:

View File

@ -2,26 +2,26 @@
from __future__ import unicode_literals
import itertools
import json
from .amp import AMPIE
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_str,
compat_urlparse,
)
from ..utils import (
ExtractorError,
clean_html,
ExtractorError,
int_or_none,
remove_end,
sanitized_Request,
urlencode_postdata
parse_age_limit,
parse_duration,
unified_timestamp,
)
class DramaFeverBaseIE(AMPIE):
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
class DramaFeverBaseIE(InfoExtractor):
_NETRC_MACHINE = 'dramafever'
_GEO_COUNTRIES = ['US', 'CA']
_CONSUMER_SECRET = 'DA59dtVXYLxajktV'
@ -38,8 +38,8 @@ class DramaFeverBaseIE(AMPIE):
'consumer secret', default=self._CONSUMER_SECRET)
def _real_initialize(self):
self._login()
self._consumer_secret = self._get_consumer_secret()
self._login()
def _login(self):
(username, password) = self._get_login_info()
@ -51,18 +51,28 @@ class DramaFeverBaseIE(AMPIE):
'password': password,
}
request = sanitized_Request(
self._LOGIN_URL, urlencode_postdata(login_form))
response = self._download_webpage(
request, None, 'Logging in')
try:
response = self._download_json(
'https://www.dramafever.com/api/users/login', None, 'Logging in',
data=json.dumps(login_form).encode('utf-8'), headers={
'x-consumer-key': self._consumer_secret,
})
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (403, 404):
response = self._parse_json(
e.cause.read().decode('utf-8'), None)
else:
raise
if all(logout_pattern not in response
for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
error = self._html_search_regex(
r'(?s)<h\d[^>]+\bclass="hidden-xs prompt"[^>]*>(.+?)</h\d',
response, 'error message', default=None)
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)
# Successful login
if response.get('result') or response.get('guid') or response.get('user_guid'):
return
errors = response.get('errors')
if errors and isinstance(errors, list):
error = errors[0]
message = error.get('message') or error['reason']
raise ExtractorError('Unable to login: %s' % message, expected=True)
raise ExtractorError('Unable to log in')
@ -70,18 +80,20 @@ class DramaFeverIE(DramaFeverBaseIE):
IE_NAME = 'dramafever'
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
_TESTS = [{
'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
'url': 'https://www.dramafever.com/drama/4274/1/Heirs/',
'info_dict': {
'id': '4512.1',
'ext': 'flv',
'title': 'Cooking with Shin',
'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0',
'id': '4274.1',
'ext': 'wvm',
'title': 'Heirs - Episode 1',
'description': 'md5:362a24ba18209f6276e032a651c50bc2',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 3783,
'timestamp': 1381354993,
'upload_date': '20131009',
'series': 'Heirs',
'season_number': 1,
'episode': 'Episode 1',
'episode_number': 1,
'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1404336058,
'upload_date': '20140702',
'duration': 344,
},
'params': {
# m3u8 download
@ -110,50 +122,95 @@ class DramaFeverIE(DramaFeverBaseIE):
'only_matching': True,
}]
def _call_api(self, path, video_id, note, fatal=False):
return self._download_json(
'https://www.dramafever.com/api/5/' + path,
video_id, note=note, headers={
'x-consumer-key': self._consumer_secret,
}, fatal=fatal)
def _get_subtitles(self, video_id):
subtitles = {}
subs = self._call_api(
'video/%s/subtitles/webvtt/' % video_id, video_id,
'Downloading subtitles JSON', fatal=False)
if not subs or not isinstance(subs, list):
return subtitles
for sub in subs:
if not isinstance(sub, dict):
continue
sub_url = sub.get('url')
if not sub_url or not isinstance(sub_url, compat_str):
continue
subtitles.setdefault(
sub.get('code') or sub.get('language') or 'en', []).append({
'url': sub_url
})
return subtitles
def _real_extract(self, url):
video_id = self._match_id(url).replace('/', '.')
try:
info = self._extract_feed_info(
'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
self.raise_geo_restricted(
msg='Currently unavailable in your country',
countries=self._GEO_COUNTRIES)
raise
# title is postfixed with video id for some reason, removing
if info.get('title'):
info['title'] = remove_end(info['title'], video_id).strip()
series_id, episode_number = video_id.split('.')
episode_info = self._download_json(
# We only need a single episode info, so restricting page size to one episode
# and dealing with page number as with episode number
r'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_number=%s&page_size=1'
% (self._consumer_secret, series_id, episode_number),
video_id, 'Downloading episode info JSON', fatal=False)
if episode_info:
value = episode_info.get('value')
if isinstance(value, list):
for v in value:
if v.get('type') == 'Episode':
subfile = v.get('subfile') or v.get('new_subfile')
if subfile and subfile != 'http://www.dramafever.com/st/':
info.setdefault('subtitles', {}).setdefault('English', []).append({
'ext': 'srt',
'url': subfile,
})
episode_number = int_or_none(v.get('number'))
episode_fallback = 'Episode'
if episode_number:
episode_fallback += ' %d' % episode_number
info['episode'] = v.get('title') or episode_fallback
info['episode_number'] = episode_number
break
return info
video = self._call_api(
'series/%s/episodes/%s/' % (series_id, episode_number), video_id,
'Downloading video JSON')
formats = []
download_assets = video.get('download_assets')
if download_assets and isinstance(download_assets, dict):
for format_id, format_dict in download_assets.items():
if not isinstance(format_dict, dict):
continue
format_url = format_dict.get('url')
if not format_url or not isinstance(format_url, compat_str):
continue
formats.append({
'url': format_url,
'format_id': format_id,
'filesize': int_or_none(video.get('filesize')),
})
stream = self._call_api(
'video/%s/stream/' % video_id, video_id, 'Downloading stream JSON',
fatal=False)
if stream:
stream_url = stream.get('stream_url')
if stream_url:
formats.extend(self._extract_m3u8_formats(
stream_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
self._sort_formats(formats)
title = video.get('title') or 'Episode %s' % episode_number
description = video.get('description')
thumbnail = video.get('thumbnail')
timestamp = unified_timestamp(video.get('release_date'))
duration = parse_duration(video.get('duration'))
age_limit = parse_age_limit(video.get('tv_rating'))
series = video.get('series_title')
season_number = int_or_none(video.get('season'))
if series:
title = '%s - %s' % (series, title)
subtitles = self.extract_subtitles(video_id)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'timestamp': timestamp,
'age_limit': age_limit,
'series': series,
'season_number': season_number,
'episode_number': int_or_none(episode_number),
'formats': formats,
'subtitles': subtitles,
}
class DramaFeverSeriesIE(DramaFeverBaseIE):

View File

@ -1139,6 +1139,7 @@ from .tvnoe import TVNoeIE
from .tvnow import (
TVNowIE,
TVNowListIE,
TVNowShowIE,
)
from .tvp import (
TVPEmbedIE,

View File

@ -141,6 +141,7 @@ class MedialaanIE(GigyaBaseIE):
vod_id = config.get('vodId') or self._search_regex(
(r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
r'"vodId"\s*:\s*"(.+?)"',
r'<[^>]+id=["\']vod-(\d+)'),
webpage, 'video_id', default=None)

View File

@ -68,11 +68,11 @@ class NationalGeographicVideoIE(InfoExtractor):
class NationalGeographicIE(ThePlatformIE, AdobePassIE):
IE_NAME = 'natgeo'
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:wild/)?[^/]+/)?(?:videos|episodes)/(?P<id>[^/?]+)'
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:(?:wild/)?[^/]+/)?(?:videos|episodes)|u)/(?P<id>[^/?]+)'
_TESTS = [
{
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
'url': 'http://channel.nationalgeographic.com/u/kdi9Ld0PN2molUUIMSBGxoeDhD729KRjQcnxtetilWPMevo8ZwUBIDuPR0Q3D2LVaTsk0MPRkRWDB8ZhqWVeyoxfsZZm36yRp1j-zPfsHEyI_EgAeFY/',
'md5': '518c9aa655686cf81493af5cc21e2a04',
'info_dict': {
'id': 'vKInpacll2pC',
@ -86,7 +86,7 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE):
'add_ie': ['ThePlatform'],
},
{
'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
'url': 'http://channel.nationalgeographic.com/u/kdvOstqYaBY-vSBPyYgAZRUL4sWUJ5XUUPEhc7ISyBHqoIO4_dzfY3K6EjHIC0hmFXoQ7Cpzm6RkET7S3oMlm6CFnrQwSUwo/',
'md5': 'c4912f656b4cbe58f3e000c489360989',
'info_dict': {
'id': 'Pok5lWCkiEFA',
@ -106,6 +106,14 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE):
{
'url': 'http://channel.nationalgeographic.com/videos/treasures-rediscovered/',
'only_matching': True,
},
{
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
'only_matching': True,
},
{
'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
'only_matching': True,
}
]

View File

@ -1,8 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@ -43,9 +41,14 @@ class NaverIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
m_id = re.search(r'var rmcPlayer = new nhn\.rmcnmv\.RMCVideoPlayer\("(.+?)", "(.+?)"',
webpage)
if m_id is None:
vid = self._search_regex(
r'videoId["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
'video id', fatal=None, group='value')
in_key = self._search_regex(
r'inKey["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
'key', default=None, group='value')
if not vid or not in_key:
error = self._html_search_regex(
r'(?s)<div class="(?:nation_error|nation_box|error_box)">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
webpage, 'error', default=None)
@ -53,9 +56,9 @@ class NaverIE(InfoExtractor):
raise ExtractorError(error, expected=True)
raise ExtractorError('couldn\'t extract vid and key')
video_data = self._download_json(
'http://play.rmcnmv.naver.com/vod/play/v2.0/' + m_id.group(1),
'http://play.rmcnmv.naver.com/vod/play/v2.0/' + vid,
video_id, query={
'key': m_id.group(2),
'key': in_key,
})
meta = video_data['meta']
title = meta['subject']

View File

@ -243,7 +243,7 @@ class PhantomJSwrapper(object):
class OpenloadIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
_VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
_TESTS = [{
'url': 'https://openload.co/f/kUEfGclsU9o',

View File

@ -10,6 +10,7 @@ from ..utils import (
int_or_none,
parse_iso8601,
parse_duration,
try_get,
update_url_query,
)
@ -58,14 +59,22 @@ class TVNowBaseIE(InfoExtractor):
duration = parse_duration(info.get('duration'))
f = info.get('format', {})
thumbnails = [{
'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id,
}]
thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
if thumbnail:
thumbnails.append({
'url': thumbnail,
})
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'thumbnails': thumbnails,
'timestamp': timestamp,
'duration': duration,
'series': f.get('title'),
@ -77,7 +86,12 @@ class TVNowBaseIE(InfoExtractor):
class TVNowIE(TVNowBaseIE):
_VALID_URL = r'https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P<id>[^/]+)/(?:player|preview)'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
(?P<show_id>[^/]+)/
(?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
'''
_TESTS = [{
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
@ -99,27 +113,30 @@ class TVNowIE(TVNowBaseIE):
}, {
# rtl2
'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player',
'only_matching': 'True',
'only_matching': True,
}, {
# rtlnitro
'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player',
'only_matching': 'True',
'only_matching': True,
}, {
# superrtl
'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player',
'only_matching': 'True',
'only_matching': True,
}, {
# ntv
'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player',
'only_matching': 'True',
'only_matching': True,
}, {
# vox
'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player',
'only_matching': 'True',
'only_matching': True,
}, {
# rtlplus
'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player',
'only_matching': 'True',
'only_matching': True,
}, {
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3',
'only_matching': True,
}]
def _real_extract(self, url):
@ -133,8 +150,30 @@ class TVNowIE(TVNowBaseIE):
return self._extract_video(info, display_id)
class TVNowListIE(TVNowBaseIE):
_VALID_URL = r'(?P<base_url>https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/)list/(?P<id>[^?/#&]+)$'
class TVNowListBaseIE(TVNowBaseIE):
_SHOW_VALID_URL = r'''(?x)
(?P<base_url>
https?://
(?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
(?P<show_id>[^/]+)
)
'''
def _extract_list_info(self, display_id, show_id):
fields = list(self._SHOW_FIELDS)
fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
fields.extend(
'formatTabs.formatTabPages.container.movies.%s' % field
for field in self._VIDEO_FIELDS)
return self._call_api(
'formats/seo', display_id, query={
'fields': ','.join(fields),
'name': show_id + '.php'
})
class TVNowListIE(TVNowListBaseIE):
_VALID_URL = r'%s/(?:list|jahr)/(?P<id>[^?\#&]+)' % TVNowListBaseIE._SHOW_VALID_URL
_SHOW_FIELDS = ('title', )
_SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
@ -147,38 +186,94 @@ class TVNowListIE(TVNowBaseIE):
'title': '30 Minuten Deutschland - Aktuell',
},
'playlist_mincount': 1,
}, {
'url': 'https://www.tvnow.de/vox/ab-ins-beet/list/staffel-14',
'only_matching': True,
}, {
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/2018/3',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return (False if TVNowIE.suitable(url)
else super(TVNowListIE, cls).suitable(url))
def _real_extract(self, url):
base_url, show_id, season_id = re.match(self._VALID_URL, url).groups()
fields = []
fields.extend(self._SHOW_FIELDS)
fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
fields.extend(
'formatTabs.formatTabPages.container.movies.%s' % field
for field in self._VIDEO_FIELDS)
list_info = self._call_api(
'formats/seo', season_id, query={
'fields': ','.join(fields),
'name': show_id + '.php'
})
list_info = self._extract_list_info(season_id, show_id)
season = next(
season for season in list_info['formatTabs']['items']
if season.get('seoheadline') == season_id)
title = '%s - %s' % (list_info['title'], season['headline'])
title = list_info.get('title')
headline = season.get('headline')
if title and headline:
title = '%s - %s' % (title, headline)
else:
title = headline or title
entries = []
for container in season['formatTabPages']['items']:
for info in ((container.get('container') or {}).get('movies') or {}).get('items') or []:
items = try_get(
container, lambda x: x['container']['movies']['items'],
list) or []
for info in items:
seo_url = info.get('seoUrl')
if not seo_url:
continue
video_id = info.get('id')
entries.append(self.url_result(
base_url + seo_url + '/player', 'TVNow', info.get('id')))
'%s/%s/player' % (base_url, seo_url), TVNowIE.ie_key(),
compat_str(video_id) if video_id else None))
return self.playlist_result(
entries, compat_str(season.get('id') or season_id), title)
class TVNowShowIE(TVNowListBaseIE):
_VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
_SHOW_FIELDS = ('id', 'title', )
_SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
_VIDEO_FIELDS = ()
_TESTS = [{
'url': 'https://www.tvnow.at/vox/ab-ins-beet',
'info_dict': {
'id': 'ab-ins-beet',
'title': 'Ab ins Beet!',
},
'playlist_mincount': 7,
}, {
'url': 'https://www.tvnow.at/vox/ab-ins-beet/list',
'only_matching': True,
}, {
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return (False if TVNowIE.suitable(url) or TVNowListIE.suitable(url)
else super(TVNowShowIE, cls).suitable(url))
def _real_extract(self, url):
base_url, show_id = re.match(self._VALID_URL, url).groups()
list_info = self._extract_list_info(show_id, show_id)
entries = []
for season_info in list_info['formatTabs']['items']:
season_url = season_info.get('seoheadline')
if not season_url:
continue
season_id = season_info.get('id')
entries.append(self.url_result(
'%s/list/%s' % (base_url, season_url), TVNowListIE.ie_key(),
compat_str(season_id) if season_id else None,
season_info.get('headline')))
return self.playlist_result(entries, show_id, list_info.get('title'))

View File

@ -58,7 +58,9 @@ class XVideosIE(InfoExtractor):
group='title') or self._og_search_title(webpage)
thumbnail = self._search_regex(
r'url_bigthumb=(.+?)&amp', webpage, 'thumbnail', fatal=False)
(r'setThumbUrl\(\s*(["\'])(?P<thumbnail>(?:(?!\1).)+)\1',
r'url_bigthumb=(?P<thumbnail>.+?)&amp'),
webpage, 'thumbnail', fatal=False, group='thumbnail')
duration = int_or_none(self._og_search_property(
'duration', webpage, default=None)) or parse_duration(
self._search_regex(

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2018.03.26.1'
__version__ = '2018.04.03'