Merge branch 'master' of git@github.com:rg3/youtube-dl.git into extractor_20detik

This commit is contained in:
Surya Oktafendri 2018-04-03 10:09:03 +07:00
commit c4877790da
No known key found for this signature in database
GPG Key ID: 8CAB076E32F1FC8D
13 changed files with 275 additions and 72 deletions

View File

@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.03.26.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.03.26.1**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.04.03*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.04.03**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2018.03.26.1
[debug] youtube-dl version 2018.04.03
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@ -1,3 +1,20 @@
version 2018.04.03
Extractors
+ [tvnow] Add support for shows (#15837)
* [dramafever] Fix authentication (#16067)
* [afreecatv] Use partial view only when necessary (#14450)
+ [afreecatv] Add support for authentication (#14450)
+ [nationalgeographic] Add support for new URL schema (#16001, #16054)
* [xvideos] Fix thumbnail extraction (#15978, #15979)
* [medialaan] Fix vod id (#16038)
+ [openload] Add support for oload.site (#16039)
* [naver] Fix extraction (#16029)
* [dramafever] Partially switch to API v5 (#16026)
* [abc:iview] Unescape title and series meta fields (#15994)
* [videa] Extend URL regular expression (#16003)
version 2018.03.26.1
Core

View File

@ -223,7 +223,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
## Filesystem Options:
-a, --batch-file FILE File containing URLs to download ('-' for
stdin)
stdin), one URL per line. Lines starting
with '#', ';' or ']' are considered as
comments and ignored.
--id Use only video ID in file name
-o, --output TEMPLATE Output filename template, see the "OUTPUT
TEMPLATE" for all the info

View File

@ -887,6 +887,7 @@
- **TVNoe**
- **TVNow**
- **TVNowList**
- **TVNowShow**
- **tvp**: Telewizja Polska
- **tvp:embed**: Telewizja Polska
- **tvp:series**

View File

@ -9,6 +9,7 @@ from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
urlencode_postdata,
xpath_text,
)
@ -28,6 +29,7 @@ class AfreecaTVIE(InfoExtractor):
)
(?P<id>\d+)
'''
_NETRC_MACHINE = 'afreecatv'
_TESTS = [{
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
@ -139,22 +141,22 @@ class AfreecaTVIE(InfoExtractor):
'skip_download': True,
},
}, {
# adult video
'url': 'http://vod.afreecatv.com/PLAYER/STATION/26542731',
# PARTIAL_ADULT
'url': 'http://vod.afreecatv.com/PLAYER/STATION/32028439',
'info_dict': {
'id': '20171001_F1AE1711_196617479_1',
'id': '20180327_27901457_202289533_1',
'ext': 'mp4',
'title': '[생]서아 초심 찾기 방송 (part 1)',
'title': '[생]빨개요♥ (part 1)',
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'BJ서아',
'uploader': '[SA]서아',
'uploader_id': 'bjdyrksu',
'upload_date': '20171001',
'duration': 3600,
'age_limit': 18,
'upload_date': '20180327',
'duration': 3601,
},
'params': {
'skip_download': True,
},
'expected_warnings': ['adult content'],
}, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
'only_matching': True,
@ -172,6 +174,51 @@ class AfreecaTVIE(InfoExtractor):
video_key['part'] = int(m.group('part'))
return video_key
def _real_initialize(self):
self._login()
def _login(self):
username, password = self._get_login_info()
if username is None:
return
login_form = {
'szWork': 'login',
'szType': 'json',
'szUid': username,
'szPassword': password,
'isSaveId': 'false',
'szScriptVar': 'oLoginRet',
'szAction': '',
}
response = self._download_json(
'https://login.afreecatv.com/app/LoginAction.php', None,
'Logging in', data=urlencode_postdata(login_form))
_ERRORS = {
-4: 'Your account has been suspended due to a violation of our terms and policies.',
-5: 'https://member.afreecatv.com/app/user_delete_progress.php',
-6: 'https://login.afreecatv.com/membership/changeMember.php',
-8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
-9: 'https://member.afreecatv.com/app/pop_login_block.php',
-11: 'https://login.afreecatv.com/afreeca/second_login.php',
-12: 'https://member.afreecatv.com/app/user_security.php',
0: 'The username does not exist or you have entered the wrong password.',
-1: 'The username does not exist or you have entered the wrong password.',
-3: 'You have entered your username/password incorrectly.',
-7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
-10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
-32008: 'You have failed to log in. Please contact our Help Center.',
}
result = int_or_none(response.get('RESULT'))
if result != 1:
error = _ERRORS.get(result, 'You have failed to log in.')
raise ExtractorError(
'Unable to login: %s said: %s' % (self.IE_NAME, error),
expected=True)
def _real_extract(self, url):
video_id = self._match_id(url)
@ -187,22 +234,42 @@ class AfreecaTVIE(InfoExtractor):
r'nBbsNo\s*=\s*(\d+)', webpage, 'bbs')
video_id = self._search_regex(
r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id)
print(video_id, station_id, bbs_id)
video_xml = self._download_xml(
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
video_id, headers={
'Referer': url,
}, query={
partial_view = False
for _ in range(2):
query = {
'nTitleNo': video_id,
'nStationNo': station_id,
'nBbsNo': bbs_id,
'partialView': 'SKIP_ADULT',
})
}
if partial_view:
query['partialView'] = 'SKIP_ADULT'
video_xml = self._download_xml(
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
video_id, 'Downloading video info XML%s'
% (' (skipping adult)' if partial_view else ''),
video_id, headers={
'Referer': url,
}, query=query)
flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
if flag and flag != 'SUCCEED':
flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
if flag and flag == 'SUCCEED':
break
if flag == 'PARTIAL_ADULT':
self._downloader.report_warning(
'In accordance with local laws and regulations, underage users are restricted from watching adult content. '
'Only content suitable for all ages will be downloaded. '
'Provide account credentials if you wish to download restricted content.')
partial_view = True
continue
elif flag == 'ADULT':
error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.'
else:
error = flag
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, flag), expected=True)
'%s said: %s' % (self.IE_NAME, error), expected=True)
else:
raise ExtractorError('Unable to download video info')
video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
if video_element is None or video_element.text is None:

View File

@ -117,9 +117,9 @@ class BiliBiliIE(InfoExtractor):
r'cid(?:["\']:|=)(\d+)', webpage, 'cid',
default=None
) or compat_parse_qs(self._search_regex(
[r'1EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
r'1EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
r'1<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
webpage, 'player parameters'))['cid'][0]
else:
if 'no_bangumi_tip' not in smuggled_data:

View File

@ -2,9 +2,11 @@
from __future__ import unicode_literals
import itertools
import json
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_str,
compat_urlparse,
)
@ -14,14 +16,11 @@ from ..utils import (
int_or_none,
parse_age_limit,
parse_duration,
sanitized_Request,
unified_timestamp,
urlencode_postdata
)
class DramaFeverBaseIE(InfoExtractor):
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
_NETRC_MACHINE = 'dramafever'
_CONSUMER_SECRET = 'DA59dtVXYLxajktV'
@ -39,8 +38,8 @@ class DramaFeverBaseIE(InfoExtractor):
'consumer secret', default=self._CONSUMER_SECRET)
def _real_initialize(self):
self._login()
self._consumer_secret = self._get_consumer_secret()
self._login()
def _login(self):
(username, password) = self._get_login_info()
@ -52,19 +51,29 @@ class DramaFeverBaseIE(InfoExtractor):
'password': password,
}
request = sanitized_Request(
self._LOGIN_URL, urlencode_postdata(login_form))
response = self._download_webpage(
request, None, 'Logging in')
try:
response = self._download_json(
'https://www.dramafever.com/api/users/login', None, 'Logging in',
data=json.dumps(login_form).encode('utf-8'), headers={
'x-consumer-key': self._consumer_secret,
})
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (403, 404):
response = self._parse_json(
e.cause.read().decode('utf-8'), None)
else:
raise
if all(logout_pattern not in response
for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
error = self._html_search_regex(
r'(?s)<h\d[^>]+\bclass="hidden-xs prompt"[^>]*>(.+?)</h\d',
response, 'error message', default=None)
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in')
# Successful login
if response.get('result') or response.get('guid') or response.get('user_guid'):
return
errors = response.get('errors')
if errors and isinstance(errors, list):
error = errors[0]
message = error.get('message') or error['reason']
raise ExtractorError('Unable to login: %s' % message, expected=True)
raise ExtractorError('Unable to log in')
class DramaFeverIE(DramaFeverBaseIE):

View File

@ -1137,6 +1137,7 @@ from .tvnoe import TVNoeIE
from .tvnow import (
TVNowIE,
TVNowListIE,
TVNowShowIE,
)
from .tvp import (
TVPEmbedIE,

View File

@ -141,6 +141,7 @@ class MedialaanIE(GigyaBaseIE):
vod_id = config.get('vodId') or self._search_regex(
(r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
r'"vodId"\s*:\s*"(.+?)"',
r'<[^>]+id=["\']vod-(\d+)'),
webpage, 'video_id', default=None)

View File

@ -68,11 +68,11 @@ class NationalGeographicVideoIE(InfoExtractor):
class NationalGeographicIE(ThePlatformIE, AdobePassIE):
IE_NAME = 'natgeo'
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:wild/)?[^/]+/)?(?:videos|episodes)/(?P<id>[^/?]+)'
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:(?:wild/)?[^/]+/)?(?:videos|episodes)|u)/(?P<id>[^/?]+)'
_TESTS = [
{
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
'url': 'http://channel.nationalgeographic.com/u/kdi9Ld0PN2molUUIMSBGxoeDhD729KRjQcnxtetilWPMevo8ZwUBIDuPR0Q3D2LVaTsk0MPRkRWDB8ZhqWVeyoxfsZZm36yRp1j-zPfsHEyI_EgAeFY/',
'md5': '518c9aa655686cf81493af5cc21e2a04',
'info_dict': {
'id': 'vKInpacll2pC',
@ -86,7 +86,7 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE):
'add_ie': ['ThePlatform'],
},
{
'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
'url': 'http://channel.nationalgeographic.com/u/kdvOstqYaBY-vSBPyYgAZRUL4sWUJ5XUUPEhc7ISyBHqoIO4_dzfY3K6EjHIC0hmFXoQ7Cpzm6RkET7S3oMlm6CFnrQwSUwo/',
'md5': 'c4912f656b4cbe58f3e000c489360989',
'info_dict': {
'id': 'Pok5lWCkiEFA',
@ -106,6 +106,14 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE):
{
'url': 'http://channel.nationalgeographic.com/videos/treasures-rediscovered/',
'only_matching': True,
},
{
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
'only_matching': True,
},
{
'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
'only_matching': True,
}
]

View File

@ -10,6 +10,7 @@ from ..utils import (
int_or_none,
parse_iso8601,
parse_duration,
try_get,
update_url_query,
)
@ -58,14 +59,22 @@ class TVNowBaseIE(InfoExtractor):
duration = parse_duration(info.get('duration'))
f = info.get('format', {})
thumbnails = [{
'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id,
}]
thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
if thumbnail:
thumbnails.append({
'url': thumbnail,
})
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'thumbnails': thumbnails,
'timestamp': timestamp,
'duration': duration,
'series': f.get('title'),
@ -77,7 +86,12 @@ class TVNowBaseIE(InfoExtractor):
class TVNowIE(TVNowBaseIE):
_VALID_URL = r'https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P<id>[^/]+)/(?:player|preview)'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
(?P<show_id>[^/]+)/
(?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
'''
_TESTS = [{
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
@ -99,27 +113,30 @@ class TVNowIE(TVNowBaseIE):
}, {
# rtl2
'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player',
'only_matching': 'True',
'only_matching': True,
}, {
# rtlnitro
'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player',
'only_matching': 'True',
'only_matching': True,
}, {
# superrtl
'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player',
'only_matching': 'True',
'only_matching': True,
}, {
# ntv
'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player',
'only_matching': 'True',
'only_matching': True,
}, {
# vox
'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player',
'only_matching': 'True',
'only_matching': True,
}, {
# rtlplus
'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player',
'only_matching': 'True',
'only_matching': True,
}, {
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3',
'only_matching': True,
}]
def _real_extract(self, url):
@ -133,8 +150,30 @@ class TVNowIE(TVNowBaseIE):
return self._extract_video(info, display_id)
class TVNowListIE(TVNowBaseIE):
_VALID_URL = r'(?P<base_url>https?://(?:www\.)?tvnow\.(?:de|at|ch)/(?:rtl(?:2|plus)?|nitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/)list/(?P<id>[^?/#&]+)$'
class TVNowListBaseIE(TVNowBaseIE):
_SHOW_VALID_URL = r'''(?x)
(?P<base_url>
https?://
(?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
(?P<show_id>[^/]+)
)
'''
def _extract_list_info(self, display_id, show_id):
fields = list(self._SHOW_FIELDS)
fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
fields.extend(
'formatTabs.formatTabPages.container.movies.%s' % field
for field in self._VIDEO_FIELDS)
return self._call_api(
'formats/seo', display_id, query={
'fields': ','.join(fields),
'name': show_id + '.php'
})
class TVNowListIE(TVNowListBaseIE):
_VALID_URL = r'%s/(?:list|jahr)/(?P<id>[^?\#&]+)' % TVNowListBaseIE._SHOW_VALID_URL
_SHOW_FIELDS = ('title', )
_SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
@ -147,38 +186,94 @@ class TVNowListIE(TVNowBaseIE):
'title': '30 Minuten Deutschland - Aktuell',
},
'playlist_mincount': 1,
}, {
'url': 'https://www.tvnow.de/vox/ab-ins-beet/list/staffel-14',
'only_matching': True,
}, {
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/2018/3',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return (False if TVNowIE.suitable(url)
else super(TVNowListIE, cls).suitable(url))
def _real_extract(self, url):
base_url, show_id, season_id = re.match(self._VALID_URL, url).groups()
fields = []
fields.extend(self._SHOW_FIELDS)
fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
fields.extend(
'formatTabs.formatTabPages.container.movies.%s' % field
for field in self._VIDEO_FIELDS)
list_info = self._call_api(
'formats/seo', season_id, query={
'fields': ','.join(fields),
'name': show_id + '.php'
})
list_info = self._extract_list_info(season_id, show_id)
season = next(
season for season in list_info['formatTabs']['items']
if season.get('seoheadline') == season_id)
title = '%s - %s' % (list_info['title'], season['headline'])
title = list_info.get('title')
headline = season.get('headline')
if title and headline:
title = '%s - %s' % (title, headline)
else:
title = headline or title
entries = []
for container in season['formatTabPages']['items']:
for info in ((container.get('container') or {}).get('movies') or {}).get('items') or []:
items = try_get(
container, lambda x: x['container']['movies']['items'],
list) or []
for info in items:
seo_url = info.get('seoUrl')
if not seo_url:
continue
video_id = info.get('id')
entries.append(self.url_result(
base_url + seo_url + '/player', 'TVNow', info.get('id')))
'%s/%s/player' % (base_url, seo_url), TVNowIE.ie_key(),
compat_str(video_id) if video_id else None))
return self.playlist_result(
entries, compat_str(season.get('id') or season_id), title)
class TVNowShowIE(TVNowListBaseIE):
_VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
_SHOW_FIELDS = ('id', 'title', )
_SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
_VIDEO_FIELDS = ()
_TESTS = [{
'url': 'https://www.tvnow.at/vox/ab-ins-beet',
'info_dict': {
'id': 'ab-ins-beet',
'title': 'Ab ins Beet!',
},
'playlist_mincount': 7,
}, {
'url': 'https://www.tvnow.at/vox/ab-ins-beet/list',
'only_matching': True,
}, {
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return (False if TVNowIE.suitable(url) or TVNowListIE.suitable(url)
else super(TVNowShowIE, cls).suitable(url))
def _real_extract(self, url):
base_url, show_id = re.match(self._VALID_URL, url).groups()
list_info = self._extract_list_info(show_id, show_id)
entries = []
for season_info in list_info['formatTabs']['items']:
season_url = season_info.get('seoheadline')
if not season_url:
continue
season_id = season_info.get('id')
entries.append(self.url_result(
'%s/list/%s' % (base_url, season_url), TVNowListIE.ie_key(),
compat_str(season_id) if season_id else None,
season_info.get('headline')))
return self.playlist_result(entries, show_id, list_info.get('title'))

View File

@ -58,7 +58,9 @@ class XVideosIE(InfoExtractor):
group='title') or self._og_search_title(webpage)
thumbnail = self._search_regex(
r'url_bigthumb=(.+?)&amp', webpage, 'thumbnail', fatal=False)
(r'setThumbUrl\(\s*(["\'])(?P<thumbnail>(?:(?!\1).)+)\1',
r'url_bigthumb=(?P<thumbnail>.+?)&amp'),
webpage, 'thumbnail', fatal=False, group='thumbnail')
duration = int_or_none(self._og_search_property(
'duration', webpage, default=None)) or parse_duration(
self._search_regex(

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2018.03.26.1'
__version__ = '2018.04.03'