[AnimeLab] Extract both English and Japanese all in one go, if available

This commit is contained in:
Mariusz Skoneczko 2020-04-23 21:03:20 +10:00
parent c3dca171d0
commit 2f6d029e96

View File

@ -94,110 +94,118 @@ class AnimeLabIE(AnimeLabBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id, 'Downloading requested URL') # unfortunately we can get different URLs for the same formats
# e.g. if we are using a "free" account so no dubs available
# (so _remove_duplicate_formats is not effective)
# so we use a dictionary as a workaround
formats = {}
for language_option_url in ('https://www.animelab.com/player/%s/subtitles',
'https://www.animelab.com/player/%s/dubbed'):
actual_url = language_option_url % display_id
webpage = self._download_webpage(actual_url, display_id, 'Downloading URL ' + actual_url)
video_collection = self._parse_json(self._search_regex(r'new\s+?AnimeLabApp\.VideoCollection\s*?\((.*?)\);', webpage, 'AnimeLab VideoCollection'), display_id) video_collection = self._parse_json(self._search_regex(r'new\s+?AnimeLabApp\.VideoCollection\s*?\((.*?)\);', webpage, 'AnimeLab VideoCollection'), display_id)
position = int_or_none(self._search_regex(r'playlistPosition\s*?=\s*?(\d+)', webpage, 'Playlist Position')) position = int_or_none(self._search_regex(r'playlistPosition\s*?=\s*?(\d+)', webpage, 'Playlist Position'))
raw_data = video_collection[position]['videoEntry'] raw_data = video_collection[position]['videoEntry']
video_id = str_or_none(raw_data['id']) video_id = str_or_none(raw_data['id'])
# create a title from many sources (while grabbing other info) # create a title from many sources (while grabbing other info)
# TODO use more fallback sources to get some of these # TODO use more fallback sources to get some of these
series = raw_data.get('showTitle') series = raw_data.get('showTitle')
video_type = raw_data.get('videoEntryType', {}).get('name') video_type = raw_data.get('videoEntryType', {}).get('name')
episode_number = raw_data.get('episodeNumber') episode_number = raw_data.get('episodeNumber')
episode_name = raw_data.get('name') episode_name = raw_data.get('name')
title_parts = (series, video_type, episode_number, episode_name) title_parts = (series, video_type, episode_number, episode_name)
if None not in title_parts: if None not in title_parts:
title = '%s - %s %s - %s' % title_parts title = '%s - %s %s - %s' % title_parts
else: else:
title = episode_name title = episode_name
description = raw_data.get('synopsis') or self._og_search_description(webpage, default=None) description = raw_data.get('synopsis') or self._og_search_description(webpage, default=None)
duration = int_or_none(raw_data.get('duration')) duration = int_or_none(raw_data.get('duration'))
thumbnail_data = raw_data.get('images', []) thumbnail_data = raw_data.get('images', [])
thumbnails = [] thumbnails = []
for thumbnail in thumbnail_data: for thumbnail in thumbnail_data:
for instance in thumbnail['imageInstances']: for instance in thumbnail['imageInstances']:
image_data = instance.get('imageInfo', {}) image_data = instance.get('imageInfo', {})
thumbnails.append({ thumbnails.append({
'id': str_or_none(image_data.get('id')), 'id': str_or_none(image_data.get('id')),
'url': image_data.get('fullPath'), 'url': image_data.get('fullPath'),
'width': image_data.get('width'), 'width': image_data.get('width'),
'height': image_data.get('height'), 'height': image_data.get('height'),
}) })
season_data = raw_data.get('season', {}) season_data = raw_data.get('season', {})
season = str_or_none(season_data.get('name')) season = str_or_none(season_data.get('name'))
season_number = int_or_none(season_data.get('seasonNumber')) season_number = int_or_none(season_data.get('seasonNumber'))
season_id = str_or_none(season_data.get('id')) season_id = str_or_none(season_data.get('id'))
formats = [] for video_data in raw_data['videoList']:
for video_data in raw_data['videoList']: current_video_list = {}
current_video_list = {} current_video_list['language'] = video_data.get('language', {}).get('languageCode')
current_video_list['language'] = video_data.get('language', {}).get('languageCode')
is_hardsubbed = video_data.get('hardSubbed') is_hardsubbed = video_data.get('hardSubbed')
for video_instance in video_data['videoInstances']: for video_instance in video_data['videoInstances']:
httpurl = video_instance.get('httpUrl') httpurl = video_instance.get('httpUrl')
url = httpurl if httpurl else video_instance.get('rtmpUrl') url = httpurl if httpurl else video_instance.get('rtmpUrl')
if url is None: if url is None:
# this video format is unavailable to the user (not premium etc.) # this video format is unavailable to the user (not premium etc.)
continue continue
current_format = current_video_list.copy() current_format = current_video_list.copy()
format_id_parts = [] format_id_parts = []
format_id_parts.append(str_or_none(video_instance.get('id'))) format_id_parts.append(str_or_none(video_instance.get('id')))
if is_hardsubbed is not None: if is_hardsubbed is not None:
if is_hardsubbed: if is_hardsubbed:
format_id_parts.append('yeshardsubbed') format_id_parts.append('yeshardsubbed')
else:
format_id_parts.append('nothardsubbed')
format_id_parts.append(current_format['language'])
format_id = '_'.join([x for x in format_id_parts if x is not None])
ext = determine_ext(url)
if ext == 'm3u8':
for format_ in self._extract_m3u8_formats(
url, video_id, m3u8_id=format_id, fatal=False):
formats[format_['format_id']] = format_
continue
elif ext == 'mpd':
for format_ in self._extract_mpd_formats(
url, video_id, mpd_id=format_id, fatal=False):
formats[format_['format_id']] = format_
continue
current_format['url'] = url
quality_data = video_instance.get('videoQuality')
if quality_data:
quality = quality_data.get('name') or quality_data.get('description')
else: else:
format_id_parts.append('nothardsubbed') quality = None
format_id_parts.append(current_format['language']) height = None
if quality:
height = int_or_none(self._search_regex(r'(\d+)p?$', quality, 'Video format height', default=None))
format_id = '_'.join([x for x in format_id_parts if x is not None]) if height is None:
self.report_warning('Could not get height of video')
else:
current_format['height'] = height
current_format['format_id'] = format_id
ext = determine_ext(url) formats[current_format['format_id']] = current_format
if ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
url, video_id, m3u8_id=format_id, fatal=False)
formats.extend(m3u8_formats)
continue
elif ext == 'mpd':
mpd_formats = self._extract_mpd_formats(
url, video_id, mpd_id=format_id, fatal=False)
formats.extend(mpd_formats)
continue
current_format['url'] = url
quality_data = video_instance.get('videoQuality')
if quality_data:
quality = quality_data.get('name') or quality_data.get('description')
else:
quality = None
height = None
if quality:
height = int_or_none(self._search_regex(r'(\d+)p?$', quality, 'Video format height', default=None))
if height is None:
self.report_warning('Could not get height of video')
else:
current_format['height'] = height
current_format['format_id'] = format_id
formats.append(current_format)
formats = list(formats.values())
self._sort_formats(formats) self._sort_formats(formats)
return { return {