From 2f6d029e96aa2d13b2671af35b06309a91459091 Mon Sep 17 00:00:00 2001 From: Mariusz Skoneczko Date: Thu, 23 Apr 2020 21:03:20 +1000 Subject: [PATCH] [AnimeLab] Extract both English and Japanese all in one go, if available --- youtube_dl/extractor/animelab.py | 174 ++++++++++++++++--------------- 1 file changed, 91 insertions(+), 83 deletions(-) diff --git a/youtube_dl/extractor/animelab.py b/youtube_dl/extractor/animelab.py index 0bd599935..f6ea5a107 100644 --- a/youtube_dl/extractor/animelab.py +++ b/youtube_dl/extractor/animelab.py @@ -94,110 +94,118 @@ class AnimeLabIE(AnimeLabBaseIE): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id, 'Downloading requested URL') + # unfortunately we can get different URLs for the same formats + # e.g. if we are using a "free" account so no dubs available + # (so _remove_duplicate_formats is not effective) + # so we use a dictionary as a workaround + formats = {} + for language_option_url in ('https://www.animelab.com/player/%s/subtitles', + 'https://www.animelab.com/player/%s/dubbed'): + actual_url = language_option_url % display_id + webpage = self._download_webpage(actual_url, display_id, 'Downloading URL ' + actual_url) - video_collection = self._parse_json(self._search_regex(r'new\s+?AnimeLabApp\.VideoCollection\s*?\((.*?)\);', webpage, 'AnimeLab VideoCollection'), display_id) - position = int_or_none(self._search_regex(r'playlistPosition\s*?=\s*?(\d+)', webpage, 'Playlist Position')) + video_collection = self._parse_json(self._search_regex(r'new\s+?AnimeLabApp\.VideoCollection\s*?\((.*?)\);', webpage, 'AnimeLab VideoCollection'), display_id) + position = int_or_none(self._search_regex(r'playlistPosition\s*?=\s*?(\d+)', webpage, 'Playlist Position')) - raw_data = video_collection[position]['videoEntry'] + raw_data = video_collection[position]['videoEntry'] - video_id = str_or_none(raw_data['id']) + video_id = str_or_none(raw_data['id']) - # create a title from many sources (while grabbing other info) - # TODO use more fallback sources to get some of these - series = raw_data.get('showTitle') - video_type = raw_data.get('videoEntryType', {}).get('name') - episode_number = raw_data.get('episodeNumber') - episode_name = raw_data.get('name') + # create a title from many sources (while grabbing other info) + # TODO use more fallback sources to get some of these + series = raw_data.get('showTitle') + video_type = raw_data.get('videoEntryType', {}).get('name') + episode_number = raw_data.get('episodeNumber') + episode_name = raw_data.get('name') - title_parts = (series, video_type, episode_number, episode_name) - if None not in title_parts: - title = '%s - %s %s - %s' % title_parts - else: - title = episode_name + title_parts = (series, video_type, episode_number, episode_name) + if None not in title_parts: + title = '%s - %s %s - %s' % title_parts + else: + title = episode_name - description = raw_data.get('synopsis') or self._og_search_description(webpage, default=None) + description = raw_data.get('synopsis') or self._og_search_description(webpage, default=None) - duration = int_or_none(raw_data.get('duration')) + duration = int_or_none(raw_data.get('duration')) - thumbnail_data = raw_data.get('images', []) - thumbnails = [] - for thumbnail in thumbnail_data: - for instance in thumbnail['imageInstances']: - image_data = instance.get('imageInfo', {}) - thumbnails.append({ - 'id': str_or_none(image_data.get('id')), - 'url': image_data.get('fullPath'), - 'width': image_data.get('width'), - 'height': image_data.get('height'), - }) + thumbnail_data = raw_data.get('images', []) + thumbnails = [] + for thumbnail in thumbnail_data: + for instance in thumbnail['imageInstances']: + image_data = instance.get('imageInfo', {}) + thumbnails.append({ + 'id': str_or_none(image_data.get('id')), + 'url': image_data.get('fullPath'), + 'width': image_data.get('width'), + 'height': image_data.get('height'), + }) - season_data = raw_data.get('season', {}) - season = str_or_none(season_data.get('name')) - season_number = int_or_none(season_data.get('seasonNumber')) - season_id = str_or_none(season_data.get('id')) + season_data = raw_data.get('season', {}) + season = str_or_none(season_data.get('name')) + season_number = int_or_none(season_data.get('seasonNumber')) + season_id = str_or_none(season_data.get('id')) - formats = [] - for video_data in raw_data['videoList']: - current_video_list = {} - current_video_list['language'] = video_data.get('language', {}).get('languageCode') + for video_data in raw_data['videoList']: + current_video_list = {} + current_video_list['language'] = video_data.get('language', {}).get('languageCode') - is_hardsubbed = video_data.get('hardSubbed') + is_hardsubbed = video_data.get('hardSubbed') - for video_instance in video_data['videoInstances']: - httpurl = video_instance.get('httpUrl') - url = httpurl if httpurl else video_instance.get('rtmpUrl') - if url is None: - # this video format is unavailable to the user (not premium etc.) - continue + for video_instance in video_data['videoInstances']: + httpurl = video_instance.get('httpUrl') + url = httpurl if httpurl else video_instance.get('rtmpUrl') + if url is None: + # this video format is unavailable to the user (not premium etc.) + continue - current_format = current_video_list.copy() + current_format = current_video_list.copy() - format_id_parts = [] + format_id_parts = [] - format_id_parts.append(str_or_none(video_instance.get('id'))) + format_id_parts.append(str_or_none(video_instance.get('id'))) - if is_hardsubbed is not None: - if is_hardsubbed: - format_id_parts.append('yeshardsubbed') + if is_hardsubbed is not None: + if is_hardsubbed: + format_id_parts.append('yeshardsubbed') + else: + format_id_parts.append('nothardsubbed') + + format_id_parts.append(current_format['language']) + + format_id = '_'.join([x for x in format_id_parts if x is not None]) + + ext = determine_ext(url) + if ext == 'm3u8': + for format_ in self._extract_m3u8_formats( + url, video_id, m3u8_id=format_id, fatal=False): + formats[format_['format_id']] = format_ + continue + elif ext == 'mpd': + for format_ in self._extract_mpd_formats( + url, video_id, mpd_id=format_id, fatal=False): + formats[format_['format_id']] = format_ + continue + + current_format['url'] = url + quality_data = video_instance.get('videoQuality') + if quality_data: + quality = quality_data.get('name') or quality_data.get('description') else: - format_id_parts.append('nothardsubbed') + quality = None - format_id_parts.append(current_format['language']) + height = None + if quality: + height = int_or_none(self._search_regex(r'(\d+)p?$', quality, 'Video format height', default=None)) - format_id = '_'.join([x for x in format_id_parts if x is not None]) + if height is None: + self.report_warning('Could not get height of video') + else: + current_format['height'] = height + current_format['format_id'] = format_id - ext = determine_ext(url) - if ext == 'm3u8': - m3u8_formats = self._extract_m3u8_formats( - url, video_id, m3u8_id=format_id, fatal=False) - formats.extend(m3u8_formats) - continue - elif ext == 'mpd': - mpd_formats = self._extract_mpd_formats( - url, video_id, mpd_id=format_id, fatal=False) - formats.extend(mpd_formats) - continue - - current_format['url'] = url - quality_data = video_instance.get('videoQuality') - if quality_data: - quality = quality_data.get('name') or quality_data.get('description') - else: - quality = None - - height = None - if quality: - height = int_or_none(self._search_regex(r'(\d+)p?$', quality, 'Video format height', default=None)) - - if height is None: - self.report_warning('Could not get height of video') - else: - current_format['height'] = height - current_format['format_id'] = format_id - - formats.append(current_format) + formats[current_format['format_id']] = current_format + formats = list(formats.values()) self._sort_formats(formats) return {