Add bulk support for go.
This commit is contained in:
parent
68fa15155f
commit
8e1b235c0e
@ -1007,6 +1007,40 @@ class InfoExtractor(object):
|
|||||||
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
|
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _search_regex_all(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||||
|
"""
|
||||||
|
The same as _search_regex, except will return all matches for all patterns instead of just one
|
||||||
|
"""
|
||||||
|
ret = []
|
||||||
|
if isinstance(pattern, (str, compat_str, compiled_regex_type)):
|
||||||
|
for match in re.finditer(pattern, string, flags):
|
||||||
|
if group is None:
|
||||||
|
ret.append(next(g for g in match.groups() if g is not None))
|
||||||
|
else:
|
||||||
|
ret.append(match.group(group))
|
||||||
|
else:
|
||||||
|
for p in pattern:
|
||||||
|
for match in re.finditer(p, string, flags):
|
||||||
|
if group is None:
|
||||||
|
ret.append(next(g for g in match.groups() if g is not None))
|
||||||
|
else:
|
||||||
|
ret.append(match.group(group))
|
||||||
|
|
||||||
|
if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
|
||||||
|
_name = '\033[0;34m%s\033[0m' % name
|
||||||
|
else:
|
||||||
|
_name = name
|
||||||
|
|
||||||
|
if len(ret) > 0:
|
||||||
|
return ret
|
||||||
|
elif default is not NO_DEFAULT:
|
||||||
|
return default
|
||||||
|
elif fatal:
|
||||||
|
raise RegexNotFoundError('Unable to extract %s' % _name)
|
||||||
|
else:
|
||||||
|
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
|
||||||
|
return None
|
||||||
|
|
||||||
def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
|
def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||||
"""
|
"""
|
||||||
Like _search_regex, but strips HTML tags and unescapes entities.
|
Like _search_regex, but strips HTML tags and unescapes entities.
|
||||||
|
@ -132,14 +132,21 @@ class GoIE(AdobePassIE):
|
|||||||
brand = site_info.get('brand')
|
brand = site_info.get('brand')
|
||||||
if not video_id or not site_info:
|
if not video_id or not site_info:
|
||||||
webpage = self._download_webpage(url, display_id or video_id)
|
webpage = self._download_webpage(url, display_id or video_id)
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex_all(
|
||||||
(
|
(
|
||||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||||
r'data-video-id=["\']*(VDKA\w+)',
|
r'data-video-id=["\']*(VDKA\w+)',
|
||||||
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
|
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
|
||||||
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
|
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
|
||||||
), webpage, 'video id', default=video_id)
|
), webpage, 'video id', default=[video_id])
|
||||||
|
|
||||||
|
# Remove duplicates and nulls
|
||||||
|
if video_id:
|
||||||
|
tmp = []
|
||||||
|
[tmp.append(x) for x in video_id if x and x not in tmp]
|
||||||
|
video_id = tmp
|
||||||
|
|
||||||
if not site_info:
|
if not site_info:
|
||||||
brand = self._search_regex(
|
brand = self._search_regex(
|
||||||
(r'data-brand=\s*["\']\s*(\d+)',
|
(r'data-brand=\s*["\']\s*(\d+)',
|
||||||
@ -160,6 +167,23 @@ class GoIE(AdobePassIE):
|
|||||||
video['url'], 'Go', video.get('id'), video.get('title')))
|
video['url'], 'Go', video.get('id'), video.get('title')))
|
||||||
entries.reverse()
|
entries.reverse()
|
||||||
return self.playlist_result(entries, show_id, show_title)
|
return self.playlist_result(entries, show_id, show_title)
|
||||||
|
|
||||||
|
if not isinstance(video_id, list):
|
||||||
|
video_id = [video_id]
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for id in video_id:
|
||||||
|
entry = self._real_extract_single(id, site_info, brand)
|
||||||
|
if entry:
|
||||||
|
entries.append(entry)
|
||||||
|
|
||||||
|
if len(entries) == 0:
|
||||||
|
return None
|
||||||
|
elif len(entries) == 1:
|
||||||
|
return entries[0]
|
||||||
|
return self.playlist_result(entries)
|
||||||
|
|
||||||
|
def _real_extract_single(self, video_id, site_info, brand):
|
||||||
video_data = self._extract_videos(brand, video_id)[0]
|
video_data = self._extract_videos(brand, video_id)[0]
|
||||||
video_id = video_data['id']
|
video_id = video_data['id']
|
||||||
title = video_data['title']
|
title = video_data['title']
|
||||||
|
Loading…
x
Reference in New Issue
Block a user