239 lines
8.7 KiB
Python
Raw Normal View History

2018-04-24 19:02:38 +03:00
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
2018-04-30 19:02:20 +03:00
from ..compat import compat_str
2018-04-24 21:27:51 +03:00
from ..utils import (
int_or_none,
2018-04-27 18:11:36 +03:00
float_or_none,
2018-05-17 16:28:20 +03:00
determine_ext,
parse_iso8601,
str_or_none,
unified_strdate,
2018-06-05 20:19:05 +03:00
urljoin,
url_basename,
remove_end
2018-04-24 21:27:51 +03:00
)
2018-04-24 19:02:38 +03:00
class PuhuTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[a-z0-9-]+)-izle'
2018-06-05 20:19:05 +03:00
IE_NAME = 'puhutv'
2018-04-24 19:02:38 +03:00
_TESTS = [
2018-06-05 20:19:05 +03:00
{
# A Film
2018-04-24 19:02:38 +03:00
'url': 'https://puhutv.com/sut-kardesler-izle',
'md5': 'a347470371d56e1585d1b2c8dab01c96',
2018-04-24 19:02:38 +03:00
'info_dict': {
'id': 'sut-kardesler',
'display_id': '5085',
2018-04-24 19:02:38 +03:00
'ext': 'mp4',
'title': 'Süt Kardeşler',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Arzu Film',
'description': 'md5:405fd024df916ca16731114eb18e511a',
2018-04-24 21:27:51 +03:00
'uploader_id': '43',
'upload_date': '20160729',
2018-05-17 16:50:33 +03:00
'timestamp': int,
2018-04-24 19:02:38 +03:00
},
},
2018-06-05 20:19:05 +03:00
{
# An Episode and geo restricted
2018-04-24 19:02:38 +03:00
'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle',
2018-06-05 20:19:05 +03:00
'only_matching': True,
2018-04-27 18:11:36 +03:00
},
2018-06-05 20:19:05 +03:00
{
# Has subtitle
2018-04-27 18:11:36 +03:00
'url': 'https://puhutv.com/dip-1-bolum-izle',
2018-06-05 20:19:05 +03:00
'only_matching': True,
2018-04-24 19:02:38 +03:00
}
]
2018-06-05 20:19:05 +03:00
_SUBTITLE_LANGS = {
2018-05-01 15:33:54 +03:00
'English': 'en',
'Deutsch': 'de',
'عربى': 'ar'
2018-04-27 18:11:36 +03:00
}
2018-04-24 19:02:38 +03:00
def _real_extract(self, url):
video_id = self._match_id(url)
info = self._download_json(
2018-06-05 20:19:05 +03:00
'https://puhutv.com/api/slug/%s-izle' % video_id, video_id)['data']
2018-04-24 19:02:38 +03:00
2018-06-05 20:19:05 +03:00
display_id = compat_str(info['id'])
title = info['title']['name']
2018-05-01 15:23:31 +03:00
if(info.get('display_name') and title is not None):
2018-04-24 19:02:38 +03:00
title += ' ' + info.get('display_name')
2018-05-01 15:23:31 +03:00
2018-06-05 20:19:05 +03:00
description = info.get('title', {}).get('description') or info.get('description')
2018-05-17 16:28:20 +03:00
timestamp = parse_iso8601(info.get('created_at'))
upload_date = unified_strdate(info.get('created_at'))
2018-05-01 15:23:31 +03:00
uploader = info.get('title', {}).get('producer', {}).get('name')
2018-05-17 16:28:20 +03:00
uploader_id = str_or_none(info.get('title', {}).get('producer', {}).get('id'))
2018-05-01 15:23:31 +03:00
view_count = int_or_none(info.get('content', {}).get('watch_count'))
duration = float_or_none(info.get('content', {}).get('duration_in_ms'), scale=1000)
2018-05-17 16:28:20 +03:00
thumbnail = urljoin('https://', info.get('content', {}).get('images', {}).get('wide', {}).get('main'))
2018-05-01 15:23:31 +03:00
release_year = int_or_none(info.get('title', {}).get('released_at'))
2018-04-24 21:27:51 +03:00
webpage_url = info.get('web_url')
# for series
season_number = int_or_none(info.get('season_number'))
season_id = int_or_none(info.get('season_id'))
episode_number = int_or_none(info.get('episode_number'))
tags = []
2018-06-05 20:19:05 +03:00
for tag in info.get('title', {}).get('genres', {}):
if isinstance(tag.get('name'), compat_str):
2018-05-09 17:27:14 +03:00
tags.append(tag.get('name'))
2018-04-24 19:02:38 +03:00
thumbnails = []
2018-06-05 20:19:05 +03:00
thumbs_dict = info.get('content', {}).get('images', {}).get('wide', {})
if isinstance(thumbs_dict, dict):
for id, url in thumbs_dict.items():
if not url or not isinstance(url, compat_str):
continue
url = urljoin('https://', url)
thumbnails.append({
'url': url,
'id': id
})
2018-04-24 19:02:38 +03:00
2018-04-27 18:11:36 +03:00
subtitles = {}
2018-06-05 20:19:05 +03:00
for subtitle in info.get('content', {}).get('subtitles', {}):
if not isinstance(subtitle, dict):
continue
2018-04-27 18:11:36 +03:00
lang = subtitle.get('language')
sub_url = subtitle.get('url')
2018-06-05 20:19:05 +03:00
if not lang or not isinstance(lang, compat_str) or not sub_url or not isinstance(sub_url, compat_str):
2018-05-09 17:27:14 +03:00
continue
2018-04-27 18:11:36 +03:00
subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
'url': sub_url,
'ext': determine_ext(sub_url)
}]
2018-05-09 17:27:14 +03:00
# Some of videos are geo restricted upon request copyright owner and returns 403
req_formats = self._download_json(
'https://puhutv.com/api/assets/%s/videos' % display_id,
2018-05-17 16:28:20 +03:00
video_id, 'Downloading video JSON', fatal=False)
2018-05-09 17:27:14 +03:00
if not req_formats:
2018-05-01 15:23:31 +03:00
self.raise_geo_restricted()
2018-04-24 19:02:38 +03:00
formats = []
2018-06-05 20:19:05 +03:00
for format in req_formats['data']['videos']:
2018-04-24 19:02:38 +03:00
media_url = format.get('url')
2018-06-05 20:19:05 +03:00
if not media_url or not isinstance(media_url, compat_str):
continue
2018-05-09 17:27:14 +03:00
ext = format.get('video_format') or determine_ext(media_url)
2018-04-24 19:02:38 +03:00
quality = format.get('quality')
2018-06-05 20:19:05 +03:00
if format.get('stream_type') == 'hls' and format.get('is_playlist') is True:
m3u8_id = remove_end(url_basename(media_url), '.m3u8')
formats.append(self._m3u8_meta_format(media_url, ext, m3u8_id=m3u8_id))
elif ext == 'mp4' and format.get('is_playlist', False) is False:
formats.append({
'url': media_url,
'format_id': 'http-%s' % quality,
2018-06-05 20:19:05 +03:00
'ext': ext,
'height': quality
})
2018-06-05 20:19:05 +03:00
self._sort_formats(formats)
2018-04-24 19:02:38 +03:00
return {
'id': video_id,
'display_id': display_id,
2018-04-24 19:02:38 +03:00
'title': title,
2018-04-24 21:27:51 +03:00
'description': description,
'season_id': season_id,
'season_number': season_number,
'episode_number': episode_number,
'release_year': release_year,
'upload_date': upload_date,
2018-05-17 16:28:20 +03:00
'timestamp': timestamp,
2018-04-24 21:27:51 +03:00
'uploader': uploader,
'uploader_id': uploader_id,
'view_count': view_count,
'duration': duration,
'tags': tags,
2018-04-27 18:11:36 +03:00
'subtitles': subtitles,
2018-04-24 21:27:51 +03:00
'webpage_url': webpage_url,
'thumbnail': thumbnail,
2018-04-24 19:02:38 +03:00
'thumbnails': thumbnails,
'formats': formats
}
2018-04-30 19:02:20 +03:00
2018-05-01 15:23:31 +03:00
class PuhuTVSerieIE(InfoExtractor):
2018-04-30 19:02:20 +03:00
_VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[a-z0-9-]+)-detay'
2018-05-01 15:23:31 +03:00
IE_NAME = 'puhutv:serie'
_TESTS = [
{
'url': 'https://puhutv.com/deniz-yildizi-detay',
'info_dict': {
'title': 'Deniz Yıldızı',
'id': 'deniz-yildizi',
'uploader': 'Focus Film',
'uploader_id': 61,
},
2018-05-17 16:28:20 +03:00
'playlist_mincount': 234,
2018-04-30 19:02:20 +03:00
},
2018-06-05 20:19:05 +03:00
{
# a film detail page which is using same url with serie page
2018-05-01 15:23:31 +03:00
'url': 'https://puhutv.com/kaybedenler-kulubu-detay',
'info_dict': {
'title': 'Kaybedenler Kulübü',
'id': 'kaybedenler-kulubu',
'uploader': 'Tolga Örnek, Murat Dörtbudak, Neslihan Dörtbudak, Kemal Kaplanoğlu',
'uploader_id': 248,
},
'playlist_mincount': 1,
},
]
2018-04-30 19:02:20 +03:00
def _extract_entries(self, playlist_id, seasons):
for season in seasons:
2018-06-05 20:19:05 +03:00
season_id = season['id']
2018-04-30 19:02:20 +03:00
season_number = season.get('position')
pagenum = 1
has_more = True
2018-05-01 15:33:54 +03:00
while has_more is True:
2018-04-30 19:02:20 +03:00
query = {
'page': pagenum,
'per': 40,
}
season_info = self._download_json(
'https://galadriel.puhutv.com/seasons/%s' % season_id,
2018-05-01 15:33:54 +03:00
playlist_id, 'Downloading season %s page %s' % (season_number, pagenum), query=query)
2018-04-30 19:02:20 +03:00
for episode in season_info.get('episodes'):
2018-06-05 20:19:05 +03:00
video_id = episode['slugPath'].replace('-izle', '')
2018-04-30 19:02:20 +03:00
yield self.url_result(
'https://puhutv.com/%s-izle' % video_id,
PuhuTVIE.ie_key(), video_id)
pagenum = pagenum + 1
2018-05-09 17:27:14 +03:00
has_more = season_info.get('hasMore', False)
2018-04-30 19:02:20 +03:00
def _real_extract(self, url):
playlist_id = self._match_id(url)
info = self._download_json(
2018-06-05 20:19:05 +03:00
'https://puhutv.com/api/slug/%s-detay' % playlist_id, playlist_id)['data']
2018-04-30 19:02:20 +03:00
title = info.get('name')
2018-05-01 15:23:31 +03:00
uploader = info.get('producer', {}).get('name')
uploader_id = info.get('producer', {}).get('id')
2018-04-30 19:02:20 +03:00
seasons = info.get('seasons')
2018-05-01 15:23:31 +03:00
if seasons:
entries = self._extract_entries(playlist_id, seasons)
else:
# For films, these are using same url with series
2018-06-05 20:19:05 +03:00
video_id = info['assets'][0]['slug']
2018-05-01 15:23:31 +03:00
return self.url_result(
'https://puhutv.com/%s-izle' % video_id,
PuhuTVIE.ie_key(), video_id)
2018-04-30 19:02:20 +03:00
return {
'_type': 'playlist',
'id': playlist_id,
'title': title,
'uploader': uploader,
'uploader_id': uploader_id,
2018-05-01 15:23:31 +03:00
'entries': entries,
2018-04-30 19:02:20 +03:00
}