Add bulk support for go.

This commit is contained in:
LexManos 2020-01-10 00:31:55 -08:00
parent 68fa15155f
commit 8e1b235c0e
2 changed files with 60 additions and 2 deletions

View File

@ -1007,6 +1007,40 @@ class InfoExtractor(object):
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
return None
def _search_regex_all(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
"""
The same as _search_regex, except will return all matches for all patterns instead of just one
"""
ret = []
if isinstance(pattern, (str, compat_str, compiled_regex_type)):
for match in re.finditer(pattern, string, flags):
if group is None:
ret.append(next(g for g in match.groups() if g is not None))
else:
ret.append(match.group(group))
else:
for p in pattern:
for match in re.finditer(p, string, flags):
if group is None:
ret.append(next(g for g in match.groups() if g is not None))
else:
ret.append(match.group(group))
if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
_name = '\033[0;34m%s\033[0m' % name
else:
_name = name
if len(ret) > 0:
return ret
elif default is not NO_DEFAULT:
return default
elif fatal:
raise RegexNotFoundError('Unable to extract %s' % _name)
else:
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
return None
def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
"""
Like _search_regex, but strips HTML tags and unescapes entities.

View File

@ -132,14 +132,21 @@ class GoIE(AdobePassIE):
brand = site_info.get('brand')
if not video_id or not site_info:
webpage = self._download_webpage(url, display_id or video_id)
video_id = self._search_regex(
video_id = self._search_regex_all(
(
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
r'data-video-id=["\']*(VDKA\w+)',
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
), webpage, 'video id', default=video_id)
), webpage, 'video id', default=[video_id])
# Remove duplicates and nulls
if video_id:
tmp = []
[tmp.append(x) for x in video_id if x and x not in tmp]
video_id = tmp
if not site_info:
brand = self._search_regex(
(r'data-brand=\s*["\']\s*(\d+)',
@ -160,6 +167,23 @@ class GoIE(AdobePassIE):
video['url'], 'Go', video.get('id'), video.get('title')))
entries.reverse()
return self.playlist_result(entries, show_id, show_title)
if not isinstance(video_id, list):
video_id = [video_id]
entries = []
for id in video_id:
entry = self._real_extract_single(id, site_info, brand)
if entry:
entries.append(entry)
if len(entries) == 0:
return None
elif len(entries) == 1:
return entries[0]
return self.playlist_result(entries)
def _real_extract_single(self, video_id, site_info, brand):
video_data = self._extract_videos(brand, video_id)[0]
video_id = video_data['id']
title = video_data['title']