From 2f6d029e96aa2d13b2671af35b06309a91459091 Mon Sep 17 00:00:00 2001
From: Mariusz Skoneczko <mariusz@skoneczko.com>
Date: Thu, 23 Apr 2020 21:03:20 +1000
Subject: [PATCH] [AnimeLab] Extract both English and Japanese all in one go,
 if available

---
 youtube_dl/extractor/animelab.py | 174 ++++++++++++++++---------------
 1 file changed, 91 insertions(+), 83 deletions(-)

diff --git a/youtube_dl/extractor/animelab.py b/youtube_dl/extractor/animelab.py
index 0bd599935..f6ea5a107 100644
--- a/youtube_dl/extractor/animelab.py
+++ b/youtube_dl/extractor/animelab.py
@@ -94,110 +94,118 @@ class AnimeLabIE(AnimeLabBaseIE):
     def _real_extract(self, url):
         display_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, display_id, 'Downloading requested URL')
+        # unfortunately we can get different URLs for the same formats
+        # e.g. if we are using a "free" account so no dubs available
+        # (so _remove_duplicate_formats is not effective)
+        # so we use a dictionary as a workaround
+        formats = {}
+        for language_option_url in ('https://www.animelab.com/player/%s/subtitles',
+                                    'https://www.animelab.com/player/%s/dubbed'):
+            actual_url = language_option_url % display_id
+            webpage = self._download_webpage(actual_url, display_id, 'Downloading URL ' + actual_url)
 
-        video_collection = self._parse_json(self._search_regex(r'new\s+?AnimeLabApp\.VideoCollection\s*?\((.*?)\);', webpage, 'AnimeLab VideoCollection'), display_id)
-        position = int_or_none(self._search_regex(r'playlistPosition\s*?=\s*?(\d+)', webpage, 'Playlist Position'))
+            video_collection = self._parse_json(self._search_regex(r'new\s+?AnimeLabApp\.VideoCollection\s*?\((.*?)\);', webpage, 'AnimeLab VideoCollection'), display_id)
+            position = int_or_none(self._search_regex(r'playlistPosition\s*?=\s*?(\d+)', webpage, 'Playlist Position'))
 
-        raw_data = video_collection[position]['videoEntry']
+            raw_data = video_collection[position]['videoEntry']
 
-        video_id = str_or_none(raw_data['id'])
+            video_id = str_or_none(raw_data['id'])
 
-        # create a title from many sources (while grabbing other info)
-        # TODO use more fallback sources to get some of these
-        series = raw_data.get('showTitle')
-        video_type = raw_data.get('videoEntryType', {}).get('name')
-        episode_number = raw_data.get('episodeNumber')
-        episode_name = raw_data.get('name')
+            # create a title from many sources (while grabbing other info)
+            # TODO use more fallback sources to get some of these
+            series = raw_data.get('showTitle')
+            video_type = raw_data.get('videoEntryType', {}).get('name')
+            episode_number = raw_data.get('episodeNumber')
+            episode_name = raw_data.get('name')
 
-        title_parts = (series, video_type, episode_number, episode_name)
-        if None not in title_parts:
-            title = '%s - %s %s - %s' % title_parts
-        else:
-            title = episode_name
+            title_parts = (series, video_type, episode_number, episode_name)
+            if None not in title_parts:
+                title = '%s - %s %s - %s' % title_parts
+            else:
+                title = episode_name
 
-        description = raw_data.get('synopsis') or self._og_search_description(webpage, default=None)
+            description = raw_data.get('synopsis') or self._og_search_description(webpage, default=None)
 
-        duration = int_or_none(raw_data.get('duration'))
+            duration = int_or_none(raw_data.get('duration'))
 
-        thumbnail_data = raw_data.get('images', [])
-        thumbnails = []
-        for thumbnail in thumbnail_data:
-            for instance in thumbnail['imageInstances']:
-                image_data = instance.get('imageInfo', {})
-                thumbnails.append({
-                    'id': str_or_none(image_data.get('id')),
-                    'url': image_data.get('fullPath'),
-                    'width': image_data.get('width'),
-                    'height': image_data.get('height'),
-                })
+            thumbnail_data = raw_data.get('images', [])
+            thumbnails = []
+            for thumbnail in thumbnail_data:
+                for instance in thumbnail['imageInstances']:
+                    image_data = instance.get('imageInfo', {})
+                    thumbnails.append({
+                        'id': str_or_none(image_data.get('id')),
+                        'url': image_data.get('fullPath'),
+                        'width': image_data.get('width'),
+                        'height': image_data.get('height'),
+                    })
 
-        season_data = raw_data.get('season', {})
-        season = str_or_none(season_data.get('name'))
-        season_number = int_or_none(season_data.get('seasonNumber'))
-        season_id = str_or_none(season_data.get('id'))
+            season_data = raw_data.get('season', {})
+            season = str_or_none(season_data.get('name'))
+            season_number = int_or_none(season_data.get('seasonNumber'))
+            season_id = str_or_none(season_data.get('id'))
 
-        formats = []
-        for video_data in raw_data['videoList']:
-            current_video_list = {}
-            current_video_list['language'] = video_data.get('language', {}).get('languageCode')
+            for video_data in raw_data['videoList']:
+                current_video_list = {}
+                current_video_list['language'] = video_data.get('language', {}).get('languageCode')
 
-            is_hardsubbed = video_data.get('hardSubbed')
+                is_hardsubbed = video_data.get('hardSubbed')
 
-            for video_instance in video_data['videoInstances']:
-                httpurl = video_instance.get('httpUrl')
-                url = httpurl if httpurl else video_instance.get('rtmpUrl')
-                if url is None:
-                    # this video format is unavailable to the user (not premium etc.)
-                    continue
+                for video_instance in video_data['videoInstances']:
+                    httpurl = video_instance.get('httpUrl')
+                    url = httpurl if httpurl else video_instance.get('rtmpUrl')
+                    if url is None:
+                        # this video format is unavailable to the user (not premium etc.)
+                        continue
 
-                current_format = current_video_list.copy()
+                    current_format = current_video_list.copy()
 
-                format_id_parts = []
+                    format_id_parts = []
 
-                format_id_parts.append(str_or_none(video_instance.get('id')))
+                    format_id_parts.append(str_or_none(video_instance.get('id')))
 
-                if is_hardsubbed is not None:
-                    if is_hardsubbed:
-                        format_id_parts.append('yeshardsubbed')
+                    if is_hardsubbed is not None:
+                        if is_hardsubbed:
+                            format_id_parts.append('yeshardsubbed')
+                        else:
+                            format_id_parts.append('nothardsubbed')
+
+                    format_id_parts.append(current_format['language'])
+
+                    format_id = '_'.join([x for x in format_id_parts if x is not None])
+
+                    ext = determine_ext(url)
+                    if ext == 'm3u8':
+                        for format_ in self._extract_m3u8_formats(
+                                url, video_id, m3u8_id=format_id, fatal=False):
+                            formats[format_['format_id']] = format_
+                        continue
+                    elif ext == 'mpd':
+                        for format_ in self._extract_mpd_formats(
+                                url, video_id, mpd_id=format_id, fatal=False):
+                            formats[format_['format_id']] = format_
+                        continue
+
+                    current_format['url'] = url
+                    quality_data = video_instance.get('videoQuality')
+                    if quality_data:
+                        quality = quality_data.get('name') or quality_data.get('description')
                     else:
-                        format_id_parts.append('nothardsubbed')
+                        quality = None
 
-                format_id_parts.append(current_format['language'])
+                    height = None
+                    if quality:
+                        height = int_or_none(self._search_regex(r'(\d+)p?$', quality, 'Video format height', default=None))
 
-                format_id = '_'.join([x for x in format_id_parts if x is not None])
+                    if height is None:
+                        self.report_warning('Could not get height of video')
+                    else:
+                        current_format['height'] = height
+                    current_format['format_id'] = format_id
 
-                ext = determine_ext(url)
-                if ext == 'm3u8':
-                    m3u8_formats = self._extract_m3u8_formats(
-                        url, video_id, m3u8_id=format_id, fatal=False)
-                    formats.extend(m3u8_formats)
-                    continue
-                elif ext == 'mpd':
-                    mpd_formats = self._extract_mpd_formats(
-                        url, video_id, mpd_id=format_id, fatal=False)
-                    formats.extend(mpd_formats)
-                    continue
-
-                current_format['url'] = url
-                quality_data = video_instance.get('videoQuality')
-                if quality_data:
-                    quality = quality_data.get('name') or quality_data.get('description')
-                else:
-                    quality = None
-
-                height = None
-                if quality:
-                    height = int_or_none(self._search_regex(r'(\d+)p?$', quality, 'Video format height', default=None))
-
-                if height is None:
-                    self.report_warning('Could not get height of video')
-                else:
-                    current_format['height'] = height
-                current_format['format_id'] = format_id
-
-                formats.append(current_format)
+                    formats[current_format['format_id']] = current_format
 
+        formats = list(formats.values())
         self._sort_formats(formats)
 
         return {