[AnimeLab] Extract both English and Japanese all in one go, if available

2020-04-23 21:03:20 +10:00 · 2020-04-23 21:03:20 +10:00 · 2f6d029e96
commit 2f6d029e96
parent c3dca171d0
1 changed files with 91 additions and 83 deletions
--- a/youtube_dl/extractor/animelab.py
+++ b/youtube_dl/extractor/animelab.py
@ -94,110 +94,118 @@ class AnimeLabIE(AnimeLabBaseIE):
    def _real_extract(self, url):
        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id, 'Downloading requested URL')
+        # unfortunately we can get different URLs for the same formats
        # e.g. if we are using a "free" account so no dubs available
        # (so _remove_duplicate_formats is not effective)
        # so we use a dictionary as a workaround
        formats = {}
        for language_option_url in ('https://www.animelab.com/player/%s/subtitles',
                                    'https://www.animelab.com/player/%s/dubbed'):
            actual_url = language_option_url % display_id
            webpage = self._download_webpage(actual_url, display_id, 'Downloading URL ' + actual_url)
-        video_collection = self._parse_json(self._search_regex(r'new\s+?AnimeLabApp\.VideoCollection\s*?\((.*?)\);', webpage, 'AnimeLab VideoCollection'), display_id)
+            video_collection = self._parse_json(self._search_regex(r'new\s+?AnimeLabApp\.VideoCollection\s*?\((.*?)\);', webpage, 'AnimeLab VideoCollection'), display_id)
-        position = int_or_none(self._search_regex(r'playlistPosition\s*?=\s*?(\d+)', webpage, 'Playlist Position'))
+            position = int_or_none(self._search_regex(r'playlistPosition\s*?=\s*?(\d+)', webpage, 'Playlist Position'))
-        raw_data = video_collection[position]['videoEntry']
+            raw_data = video_collection[position]['videoEntry']
-        video_id = str_or_none(raw_data['id'])
+            video_id = str_or_none(raw_data['id'])
-        # create a title from many sources (while grabbing other info)
+            # create a title from many sources (while grabbing other info)
-        # TODO use more fallback sources to get some of these
+            # TODO use more fallback sources to get some of these
-        series = raw_data.get('showTitle')
+            series = raw_data.get('showTitle')
-        video_type = raw_data.get('videoEntryType', {}).get('name')
+            video_type = raw_data.get('videoEntryType', {}).get('name')
-        episode_number = raw_data.get('episodeNumber')
+            episode_number = raw_data.get('episodeNumber')
-        episode_name = raw_data.get('name')
+            episode_name = raw_data.get('name')
-        title_parts = (series, video_type, episode_number, episode_name)
+            title_parts = (series, video_type, episode_number, episode_name)
-        if None not in title_parts:
+            if None not in title_parts:
-            title = '%s - %s %s - %s' % title_parts
+                title = '%s - %s %s - %s' % title_parts
-        else:
+            else:
-            title = episode_name
+                title = episode_name
-        description = raw_data.get('synopsis') or self._og_search_description(webpage, default=None)
+            description = raw_data.get('synopsis') or self._og_search_description(webpage, default=None)
-        duration = int_or_none(raw_data.get('duration'))
+            duration = int_or_none(raw_data.get('duration'))
-        thumbnail_data = raw_data.get('images', [])
+            thumbnail_data = raw_data.get('images', [])
-        thumbnails = []
+            thumbnails = []
-        for thumbnail in thumbnail_data:
+            for thumbnail in thumbnail_data:
-            for instance in thumbnail['imageInstances']:
+                for instance in thumbnail['imageInstances']:
-                image_data = instance.get('imageInfo', {})
+                    image_data = instance.get('imageInfo', {})
-                thumbnails.append({
+                    thumbnails.append({
-                    'id': str_or_none(image_data.get('id')),
+                        'id': str_or_none(image_data.get('id')),
-                    'url': image_data.get('fullPath'),
+                        'url': image_data.get('fullPath'),
-                    'width': image_data.get('width'),
+                        'width': image_data.get('width'),
-                    'height': image_data.get('height'),
+                        'height': image_data.get('height'),
-                })
+                    })
-        season_data = raw_data.get('season', {})
+            season_data = raw_data.get('season', {})
-        season = str_or_none(season_data.get('name'))
+            season = str_or_none(season_data.get('name'))
-        season_number = int_or_none(season_data.get('seasonNumber'))
+            season_number = int_or_none(season_data.get('seasonNumber'))
-        season_id = str_or_none(season_data.get('id'))
+            season_id = str_or_none(season_data.get('id'))
-        formats = []
+            for video_data in raw_data['videoList']:
-        for video_data in raw_data['videoList']:
+                current_video_list = {}
-            current_video_list = {}
+                current_video_list['language'] = video_data.get('language', {}).get('languageCode')
            current_video_list['language'] = video_data.get('language', {}).get('languageCode')
-            is_hardsubbed = video_data.get('hardSubbed')
+                is_hardsubbed = video_data.get('hardSubbed')
-            for video_instance in video_data['videoInstances']:
+                for video_instance in video_data['videoInstances']:
-                httpurl = video_instance.get('httpUrl')
+                    httpurl = video_instance.get('httpUrl')
-                url = httpurl if httpurl else video_instance.get('rtmpUrl')
+                    url = httpurl if httpurl else video_instance.get('rtmpUrl')
-                if url is None:
+                    if url is None:
-                    # this video format is unavailable to the user (not premium etc.)
+                        # this video format is unavailable to the user (not premium etc.)
-                    continue
+                        continue
-                current_format = current_video_list.copy()
+                    current_format = current_video_list.copy()
-                format_id_parts = []
+                    format_id_parts = []
-                format_id_parts.append(str_or_none(video_instance.get('id')))
+                    format_id_parts.append(str_or_none(video_instance.get('id')))
-                if is_hardsubbed is not None:
+                    if is_hardsubbed is not None:
-                    if is_hardsubbed:
+                        if is_hardsubbed:
-                        format_id_parts.append('yeshardsubbed')
+                            format_id_parts.append('yeshardsubbed')
                        else:
                            format_id_parts.append('nothardsubbed')
                    format_id_parts.append(current_format['language'])
                    format_id = '_'.join([x for x in format_id_parts if x is not None])
                    ext = determine_ext(url)
                    if ext == 'm3u8':
                        for format_ in self._extract_m3u8_formats(
                                url, video_id, m3u8_id=format_id, fatal=False):
                            formats[format_['format_id']] = format_
                        continue
                    elif ext == 'mpd':
                        for format_ in self._extract_mpd_formats(
                                url, video_id, mpd_id=format_id, fatal=False):
                            formats[format_['format_id']] = format_
                        continue
                    current_format['url'] = url
                    quality_data = video_instance.get('videoQuality')
                    if quality_data:
                        quality = quality_data.get('name') or quality_data.get('description')
                    else:
-                        format_id_parts.append('nothardsubbed')
+                        quality = None
-                format_id_parts.append(current_format['language'])
+                    height = None
                    if quality:
                        height = int_or_none(self._search_regex(r'(\d+)p?$', quality, 'Video format height', default=None))
-                format_id = '_'.join([x for x in format_id_parts if x is not None])
+                    if height is None:
                        self.report_warning('Could not get height of video')
                    else:
                        current_format['height'] = height
                    current_format['format_id'] = format_id
-                ext = determine_ext(url)
+                    formats[current_format['format_id']] = current_format
                if ext == 'm3u8':
                    m3u8_formats = self._extract_m3u8_formats(
                        url, video_id, m3u8_id=format_id, fatal=False)
                    formats.extend(m3u8_formats)
                    continue
                elif ext == 'mpd':
                    mpd_formats = self._extract_mpd_formats(
                        url, video_id, mpd_id=format_id, fatal=False)
                    formats.extend(mpd_formats)
                    continue
                current_format['url'] = url
                quality_data = video_instance.get('videoQuality')
                if quality_data:
                    quality = quality_data.get('name') or quality_data.get('description')
                else:
                    quality = None
                height = None
                if quality:
                    height = int_or_none(self._search_regex(r'(\d+)p?$', quality, 'Video format height', default=None))
                if height is None:
                    self.report_warning('Could not get height of video')
                else:
                    current_format['height'] = height
                current_format['format_id'] = format_id
                formats.append(current_format)
        formats = list(formats.values())
        self._sort_formats(formats)
        return {