Add bulk support for go.

2020-01-10 00:31:55 -08:00 · 2020-01-10 00:31:55 -08:00 · 8e1b235c0e
commit 8e1b235c0e
parent 68fa15155f
2 changed files with 60 additions and 2 deletions
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -1007,6 +1007,40 @@ class InfoExtractor(object):
            self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
            return None

+    def _search_regex_all(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
+        """
+        The same as _search_regex, except will return all matches for all patterns instead of just one
+        """
+        ret = []
+        if isinstance(pattern, (str, compat_str, compiled_regex_type)):
+            for match in re.finditer(pattern, string, flags):
+                if group is None:
+                    ret.append(next(g for g in match.groups() if g is not None))
+                else:
+                    ret.append(match.group(group))
+        else:
+            for p in pattern:
+                for match in re.finditer(p, string, flags):
+                    if group is None:
+                        ret.append(next(g for g in match.groups() if g is not None))
+                    else:
+                        ret.append(match.group(group))
+
+        if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
+            _name = '\033[0;34m%s\033[0m' % name
+        else:
+            _name = name
+
+        if len(ret) > 0:
+            return ret
+        elif default is not NO_DEFAULT:
+            return default
+        elif fatal:
+            raise RegexNotFoundError('Unable to extract %s' % _name)
+        else:
+            self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
+            return None
+
    def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
        """
        Like _search_regex, but strips HTML tags and unescapes entities.
--- a/youtube_dl/extractor/go.py
+++ b/youtube_dl/extractor/go.py
@ -132,14 +132,21 @@ class GoIE(AdobePassIE):
        brand = site_info.get('brand')
        if not video_id or not site_info:
            webpage = self._download_webpage(url, display_id or video_id)
-            video_id = self._search_regex(
+            video_id = self._search_regex_all(
                (
                    # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
                    # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
                    r'data-video-id=["\']*(VDKA\w+)',
                    # https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
                    r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
-                ), webpage, 'video id', default=video_id)
+                ), webpage, 'video id', default=[video_id])
+                
+            # Remove duplicates and nulls
+            if video_id:
+                tmp = []
+                [tmp.append(x) for x in video_id if x and x not in tmp]
+                video_id = tmp
+                
            if not site_info:
                brand = self._search_regex(
                    (r'data-brand=\s*["\']\s*(\d+)',
@ -160,6 +167,23 @@ class GoIE(AdobePassIE):
                        video['url'], 'Go', video.get('id'), video.get('title')))
                entries.reverse()
                return self.playlist_result(entries, show_id, show_title)
+        
+        if not isinstance(video_id, list):
+            video_id = [video_id]
+            
+        entries = []
+        for id in video_id:
+            entry = self._real_extract_single(id, site_info, brand)
+            if entry:
+                entries.append(entry)
+                
+        if len(entries) == 0:
+            return None
+        elif len(entries) == 1:
+            return entries[0]
+        return self.playlist_result(entries)
+        
+    def _real_extract_single(self, video_id, site_info, brand):
        video_data = self._extract_videos(brand, video_id)[0]
        video_id = video_data['id']
        title = video_data['title']