[puhutv] improvement

This commit is contained in:
Enes 2018-06-05 20:19:05 +03:00
parent f5f1ca8880
commit b521306bb5

View File

@ -10,14 +10,18 @@ from ..utils import (
parse_iso8601,
str_or_none,
unified_strdate,
urljoin
urljoin,
url_basename,
remove_end
)
class PuhuTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[a-z0-9-]+)-izle'
IE_NAME = 'puhutv'
_TESTS = [
{ # A Film
{
# A Film
'url': 'https://puhutv.com/sut-kardesler-izle',
'md5': 'a347470371d56e1585d1b2c8dab01c96',
'info_dict': {
@ -33,44 +37,18 @@ class PuhuTVIE(InfoExtractor):
'timestamp': int,
},
},
{ # An Episode and geo restricted
{
# An Episode and geo restricted
'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle',
'md5': '3cd1f4b931cff5e009dfa46a3b88a42a',
'info_dict': {
'id': 'jet-sosyete-1-bolum',
'display_id': '18501',
'ext': 'mp4',
'title': 'Jet Sosyete 1. Sezon 1. Bölüm',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'BKM',
'description': 'md5:18ba5abe6d19f8063a8348445c41e28f',
'uploader_id': '269',
'upload_date': '20180220',
'timestamp': int,
},
'only_matching': True,
},
{ # Has subtitle
{
# Has subtitle
'url': 'https://puhutv.com/dip-1-bolum-izle',
'md5': 'f27792b1169f42ab318c38887ad5b28e',
'info_dict': {
'id': 'dip-1-bolum',
'display_id': '18944',
'ext': 'mp4',
'title': 'Dip 1. Sezon 1. Bölüm',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'TMC',
'description': 'md5:e8ddb56738b093b4eae0a536e2ea02c2',
'uploader_id': '25',
'upload_date': '20180330',
'timestamp': int,
},
'params': {
'skip_download': True,
}
'only_matching': True,
}
]
IE_NAME = 'puhutv'
_SUBTITLE_LANGS = { # currently supported for some series
_SUBTITLE_LANGS = {
'English': 'en',
'Deutsch': 'de',
'عربى': 'ar'
@ -78,17 +56,15 @@ class PuhuTVIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
# API call
info = self._download_json(
'https://puhutv.com/api/slug/%s-izle' % video_id, video_id)
'https://puhutv.com/api/slug/%s-izle' % video_id, video_id)['data']
info = info.get('data')
display_id = compat_str(info.get('id'))
title = info.get('title').get('name')
display_id = compat_str(info['id'])
title = info['title']['name']
if(info.get('display_name') and title is not None):
title += ' ' + info.get('display_name')
description = info.get('title', {}).get('description')
description = info.get('title', {}).get('description') or info.get('description')
timestamp = parse_iso8601(info.get('created_at'))
upload_date = unified_strdate(info.get('created_at'))
uploader = info.get('title', {}).get('producer', {}).get('name')
@ -98,9 +74,6 @@ class PuhuTVIE(InfoExtractor):
thumbnail = urljoin('https://', info.get('content', {}).get('images', {}).get('wide', {}).get('main'))
release_year = int_or_none(info.get('title', {}).get('released_at'))
webpage_url = info.get('web_url')
tags_list = info.get('title', {}).get('genres', {})
thumbnails_list = info.get('content', {}).get('images', {}).get('wide', {})
subtitles_list = info.get('content', {}).get('subtitles', {})
# for series
season_number = int_or_none(info.get('season_number'))
@ -108,24 +81,29 @@ class PuhuTVIE(InfoExtractor):
episode_number = int_or_none(info.get('episode_number'))
tags = []
for tag in tags_list:
if tag.get('name'):
for tag in info.get('title', {}).get('genres', {}):
if isinstance(tag.get('name'), compat_str):
tags.append(tag.get('name'))
thumbnails = []
for id, url in thumbnails_list.items():
url = urljoin('https://', url)
thumbnails.append({
'url': url,
'id': id
})
thumbs_dict = info.get('content', {}).get('images', {}).get('wide', {})
if isinstance(thumbs_dict, dict):
for id, url in thumbs_dict.items():
if not url or not isinstance(url, compat_str):
continue
url = urljoin('https://', url)
thumbnails.append({
'url': url,
'id': id
})
subtitles = {}
for subtitle in subtitles_list:
for subtitle in info.get('content', {}).get('subtitles', {}):
if not isinstance(subtitle, dict):
continue
lang = subtitle.get('language')
sub_url = subtitle.get('url')
# If the keys were changed by api, continue
if not lang or not sub_url:
if not lang or not isinstance(lang, compat_str) or not sub_url or not isinstance(sub_url, compat_str):
continue
subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
'url': sub_url,
@ -138,20 +116,25 @@ class PuhuTVIE(InfoExtractor):
video_id, 'Downloading video JSON', fatal=False)
if not req_formats:
self.raise_geo_restricted()
else:
format_dict = req_formats.get('data').get('videos')
formats = []
for format in format_dict:
for format in req_formats['data']['videos']:
media_url = format.get('url')
if not media_url or not isinstance(media_url, compat_str):
continue
ext = format.get('video_format') or determine_ext(media_url)
quality = format.get('quality')
if ext == 'mp4' and format.get('is_playlist', False) is False:
if format.get('stream_type') == 'hls' and format.get('is_playlist') is True:
m3u8_id = remove_end(url_basename(media_url), '.m3u8')
formats.append(self._m3u8_meta_format(media_url, ext, m3u8_id=m3u8_id))
elif ext == 'mp4' and format.get('is_playlist', False) is False:
formats.append({
'url': media_url,
'format_id': 'http-%s' % quality,
'ext': ext
'ext': ext,
'height': quality
})
self._sort_formats(formats)
return {
'id': video_id,
@ -191,7 +174,8 @@ class PuhuTVSerieIE(InfoExtractor):
},
'playlist_mincount': 234,
},
{ # a film detail page which is using same url with serie page
{
# a film detail page which is using same url with serie page
'url': 'https://puhutv.com/kaybedenler-kulubu-detay',
'info_dict': {
'title': 'Kaybedenler Kulübü',
@ -205,7 +189,7 @@ class PuhuTVSerieIE(InfoExtractor):
def _extract_entries(self, playlist_id, seasons):
for season in seasons:
season_id = season.get('id')
season_id = season['id']
season_number = season.get('position')
pagenum = 1
has_more = True
@ -218,7 +202,7 @@ class PuhuTVSerieIE(InfoExtractor):
'https://galadriel.puhutv.com/seasons/%s' % season_id,
playlist_id, 'Downloading season %s page %s' % (season_number, pagenum), query=query)
for episode in season_info.get('episodes'):
video_id = episode.get('slugPath').replace('-izle', '')
video_id = episode['slugPath'].replace('-izle', '')
yield self.url_result(
'https://puhutv.com/%s-izle' % video_id,
PuhuTVIE.ie_key(), video_id)
@ -229,8 +213,7 @@ class PuhuTVSerieIE(InfoExtractor):
playlist_id = self._match_id(url)
info = self._download_json(
'https://puhutv.com/api/slug/%s-detay' % playlist_id,
playlist_id).get('data')
'https://puhutv.com/api/slug/%s-detay' % playlist_id, playlist_id)['data']
title = info.get('name')
uploader = info.get('producer', {}).get('name')
@ -240,7 +223,7 @@ class PuhuTVSerieIE(InfoExtractor):
entries = self._extract_entries(playlist_id, seasons)
else:
# For films, these are using same url with series
video_id = info.get('assets')[0].get('slug')
video_id = info['assets'][0]['slug']
return self.url_result(
'https://puhutv.com/%s-izle' % video_id,
PuhuTVIE.ie_key(), video_id)