diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py index 3b8a2419c..72900215e 100644 --- a/youtube_dl/extractor/puhutv.py +++ b/youtube_dl/extractor/puhutv.py @@ -10,14 +10,18 @@ from ..utils import ( parse_iso8601, str_or_none, unified_strdate, - urljoin + urljoin, + url_basename, + remove_end ) class PuhuTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P[a-z0-9-]+)-izle' + IE_NAME = 'puhutv' _TESTS = [ - { # A Film + { + # A Film 'url': 'https://puhutv.com/sut-kardesler-izle', 'md5': 'a347470371d56e1585d1b2c8dab01c96', 'info_dict': { @@ -33,44 +37,18 @@ class PuhuTVIE(InfoExtractor): 'timestamp': int, }, }, - { # An Episode and geo restricted + { + # An Episode and geo restricted 'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle', - 'md5': '3cd1f4b931cff5e009dfa46a3b88a42a', - 'info_dict': { - 'id': 'jet-sosyete-1-bolum', - 'display_id': '18501', - 'ext': 'mp4', - 'title': 'Jet Sosyete 1. Sezon 1. Bölüm', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'BKM', - 'description': 'md5:18ba5abe6d19f8063a8348445c41e28f', - 'uploader_id': '269', - 'upload_date': '20180220', - 'timestamp': int, - }, + 'only_matching': True, }, - { # Has subtitle + { + # Has subtitle 'url': 'https://puhutv.com/dip-1-bolum-izle', - 'md5': 'f27792b1169f42ab318c38887ad5b28e', - 'info_dict': { - 'id': 'dip-1-bolum', - 'display_id': '18944', - 'ext': 'mp4', - 'title': 'Dip 1. Sezon 1. Bölüm', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'TMC', - 'description': 'md5:e8ddb56738b093b4eae0a536e2ea02c2', - 'uploader_id': '25', - 'upload_date': '20180330', - 'timestamp': int, - }, - 'params': { - 'skip_download': True, - } + 'only_matching': True, } ] - IE_NAME = 'puhutv' - _SUBTITLE_LANGS = { # currently supported for some series + _SUBTITLE_LANGS = { 'English': 'en', 'Deutsch': 'de', 'عربى': 'ar' @@ -78,17 +56,15 @@ class PuhuTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - # API call info = self._download_json( - 'https://puhutv.com/api/slug/%s-izle' % video_id, video_id) + 'https://puhutv.com/api/slug/%s-izle' % video_id, video_id)['data'] - info = info.get('data') - display_id = compat_str(info.get('id')) - title = info.get('title').get('name') + display_id = compat_str(info['id']) + title = info['title']['name'] if(info.get('display_name') and title is not None): title += ' ' + info.get('display_name') - description = info.get('title', {}).get('description') + description = info.get('title', {}).get('description') or info.get('description') timestamp = parse_iso8601(info.get('created_at')) upload_date = unified_strdate(info.get('created_at')) uploader = info.get('title', {}).get('producer', {}).get('name') @@ -98,9 +74,6 @@ class PuhuTVIE(InfoExtractor): thumbnail = urljoin('https://', info.get('content', {}).get('images', {}).get('wide', {}).get('main')) release_year = int_or_none(info.get('title', {}).get('released_at')) webpage_url = info.get('web_url') - tags_list = info.get('title', {}).get('genres', {}) - thumbnails_list = info.get('content', {}).get('images', {}).get('wide', {}) - subtitles_list = info.get('content', {}).get('subtitles', {}) # for series season_number = int_or_none(info.get('season_number')) @@ -108,24 +81,29 @@ class PuhuTVIE(InfoExtractor): episode_number = int_or_none(info.get('episode_number')) tags = [] - for tag in tags_list: - if tag.get('name'): + for tag in info.get('title', {}).get('genres', {}): + if isinstance(tag.get('name'), compat_str): tags.append(tag.get('name')) thumbnails = [] - for id, url in thumbnails_list.items(): - url = urljoin('https://', url) - thumbnails.append({ - 'url': url, - 'id': id - }) + thumbs_dict = info.get('content', {}).get('images', {}).get('wide', {}) + if isinstance(thumbs_dict, dict): + for id, url in thumbs_dict.items(): + if not url or not isinstance(url, compat_str): + continue + url = urljoin('https://', url) + thumbnails.append({ + 'url': url, + 'id': id + }) subtitles = {} - for subtitle in subtitles_list: + for subtitle in info.get('content', {}).get('subtitles', {}): + if not isinstance(subtitle, dict): + continue lang = subtitle.get('language') sub_url = subtitle.get('url') - # If the keys were changed by api, continue - if not lang or not sub_url: + if not lang or not isinstance(lang, compat_str) or not sub_url or not isinstance(sub_url, compat_str): continue subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{ 'url': sub_url, @@ -138,20 +116,25 @@ class PuhuTVIE(InfoExtractor): video_id, 'Downloading video JSON', fatal=False) if not req_formats: self.raise_geo_restricted() - else: - format_dict = req_formats.get('data').get('videos') formats = [] - for format in format_dict: + for format in req_formats['data']['videos']: media_url = format.get('url') + if not media_url or not isinstance(media_url, compat_str): + continue ext = format.get('video_format') or determine_ext(media_url) quality = format.get('quality') - if ext == 'mp4' and format.get('is_playlist', False) is False: + if format.get('stream_type') == 'hls' and format.get('is_playlist') is True: + m3u8_id = remove_end(url_basename(media_url), '.m3u8') + formats.append(self._m3u8_meta_format(media_url, ext, m3u8_id=m3u8_id)) + elif ext == 'mp4' and format.get('is_playlist', False) is False: formats.append({ 'url': media_url, 'format_id': 'http-%s' % quality, - 'ext': ext + 'ext': ext, + 'height': quality }) + self._sort_formats(formats) return { 'id': video_id, @@ -191,7 +174,8 @@ class PuhuTVSerieIE(InfoExtractor): }, 'playlist_mincount': 234, }, - { # a film detail page which is using same url with serie page + { + # a film detail page which is using same url with serie page 'url': 'https://puhutv.com/kaybedenler-kulubu-detay', 'info_dict': { 'title': 'Kaybedenler Kulübü', @@ -205,7 +189,7 @@ class PuhuTVSerieIE(InfoExtractor): def _extract_entries(self, playlist_id, seasons): for season in seasons: - season_id = season.get('id') + season_id = season['id'] season_number = season.get('position') pagenum = 1 has_more = True @@ -218,7 +202,7 @@ class PuhuTVSerieIE(InfoExtractor): 'https://galadriel.puhutv.com/seasons/%s' % season_id, playlist_id, 'Downloading season %s page %s' % (season_number, pagenum), query=query) for episode in season_info.get('episodes'): - video_id = episode.get('slugPath').replace('-izle', '') + video_id = episode['slugPath'].replace('-izle', '') yield self.url_result( 'https://puhutv.com/%s-izle' % video_id, PuhuTVIE.ie_key(), video_id) @@ -229,8 +213,7 @@ class PuhuTVSerieIE(InfoExtractor): playlist_id = self._match_id(url) info = self._download_json( - 'https://puhutv.com/api/slug/%s-detay' % playlist_id, - playlist_id).get('data') + 'https://puhutv.com/api/slug/%s-detay' % playlist_id, playlist_id)['data'] title = info.get('name') uploader = info.get('producer', {}).get('name') @@ -240,7 +223,7 @@ class PuhuTVSerieIE(InfoExtractor): entries = self._extract_entries(playlist_id, seasons) else: # For films, these are using same url with series - video_id = info.get('assets')[0].get('slug') + video_id = info['assets'][0]['slug'] return self.url_result( 'https://puhutv.com/%s-izle' % video_id, PuhuTVIE.ie_key(), video_id)