From d43a0d4826a5937c201247a5ecd0230c07fc3e22 Mon Sep 17 00:00:00 2001 From: Enes Date: Tue, 24 Apr 2018 19:02:38 +0300 Subject: [PATCH 01/18] [puhutv] Add new extractor --- youtube_dl/extractor/extractors.py | 3 +- youtube_dl/extractor/puhutv.py | 88 ++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/puhutv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6fb65e4fe..10e2152db 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -849,6 +849,7 @@ from .pornhub import ( from .pornotube import PornotubeIE from .pornovoisines import PornoVoisinesIE from .pornoxo import PornoXOIE +from .puhutv import PuhuTVIE from .presstv import PressTVIE from .primesharetv import PrimeShareTVIE from .promptfile import PromptFileIE @@ -1332,7 +1333,7 @@ from .webofstories import ( WebOfStoriesPlaylistIE, ) from .weibo import ( - WeiboIE, + WeiboIE, WeiboMobileIE ) from .weiqitv import WeiqiTVIE diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py new file mode 100644 index 000000000..c2184587d --- /dev/null +++ b/youtube_dl/extractor/puhutv.py @@ -0,0 +1,88 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_str +) + + +class PuhuTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P[a-z0-9-]+)-izle' + _TESTS = [ + { + 'url': 'https://puhutv.com/sut-kardesler-izle', + 'md5': '51f11ccdeef908753b4e3a99d19be939', + 'info_dict': { + 'id': '5085', + 'slug_id': 'sut-kardesler', + 'ext': 'mp4', + 'title': 'Süt Kardeşler', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Arzu Film', + 'description': 'md5:405fd024df916ca16731114eb18e511a', + }, + }, + { + 'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle', + 'only_matching': True, + } + ] + IE_NAME = 'puhutv' + + def _real_extract(self, url): + video_id = self._match_id(url) + info = self._download_json( + 'https://puhutv.com/api/slug/%s-izle' % video_id, + video_id).get('data') + + id = compat_str(info.get('id')) + title = info.get('title').get('name') + if(info.get('display_name')): + title += ' ' + info.get('display_name') + + thumbnails = [] + for key,image in info.get('content').get('images').get('wide').items(): + thumbnails.append({ + 'url': image, + 'id': key + }) + + format_dict = self._download_json( + 'https://puhutv.com/api/assets/%s/videos' % id, + id, 'Downloading sources').get('data').get('videos') + if not format_dict: + raise ExtractorError('This video not available in your country') + + formats = [] + for format in format_dict: + media_url = format.get('url') + ext = format.get('video_format') + quality = format.get('quality') + if ext == 'hls': + format_id = 'hls-%s' % quality + formats.extend(self._extract_m3u8_formats( + media_url, id, 'm3u8', preference=-1, + m3u8_id=format_id, fatal=False)) + else: + if format.get('is_playlist') == False: + formats.append({ + 'url': media_url, + 'format_id': 'http-%s' % quality, + 'ext': ext + }) + self._sort_formats(formats) + + return { + 'id': id, + 'slug_id': video_id, + 'title': title, + 'description': info.get('title').get('description'), + 'uploader': info.get('title').get('producer').get('name'), + 'view_count': info.get('content').get('watch_count'), + 'follower_count': info.get('title').get('follower_count'), + 'thumbnail': 'https://%s' % info.get('content').get('images').get('wide').get('main'), + 'thumbnails': thumbnails, + 'loop_thumbnails': info.get('content').get('loop_thumbnails'), + 'formats': formats + } From c462fa0c5ff107ee13a8eb387bed50dc87466e1e Mon Sep 17 00:00:00 2001 From: Enes Date: Tue, 24 Apr 2018 21:27:51 +0300 Subject: [PATCH 02/18] [puhutv] fixing meta fields --- youtube_dl/extractor/puhutv.py | 66 ++++++++++++++++++++++++++++------ 1 file changed, 55 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py index c2184587d..857740053 100644 --- a/youtube_dl/extractor/puhutv.py +++ b/youtube_dl/extractor/puhutv.py @@ -5,6 +5,11 @@ from .common import InfoExtractor from ..compat import ( compat_str ) +from ..utils import ( + int_or_none, + ExtractorError, + float_or_none +) class PuhuTVIE(InfoExtractor): @@ -15,17 +20,30 @@ class PuhuTVIE(InfoExtractor): 'md5': '51f11ccdeef908753b4e3a99d19be939', 'info_dict': { 'id': '5085', - 'slug_id': 'sut-kardesler', + 'display_id': 'sut-kardesler', 'ext': 'mp4', 'title': 'Süt Kardeşler', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Arzu Film', 'description': 'md5:405fd024df916ca16731114eb18e511a', + 'uploader_id': '43', + 'upload_date': '20160729', }, }, { 'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle', - 'only_matching': True, + 'md5': '51f11ccdeef908753b4e3a99d19be939', + 'info_dict': { + 'id': '18501', + 'display_id': 'jet-sosyete', + 'ext': 'mp4', + 'title': 'Süt Kardeşler', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Arzu Film', + 'description': 'md5:405fd024df916ca16731114eb18e511a', + 'uploader_id': '43', + 'upload_date': '20160729', + }, } ] IE_NAME = 'puhutv' @@ -40,6 +58,25 @@ class PuhuTVIE(InfoExtractor): title = info.get('title').get('name') if(info.get('display_name')): title += ' ' + info.get('display_name') + description = info.get('title').get('description') + upload_date = info.get('created_at').split('T')[0].replace('-', '') + uploader = info.get('title').get('producer').get('name') + uploader_id = compat_str(info.get('title').get('producer').get('id')) + view_count = int_or_none(info.get('content').get('watch_count')) + duration = float_or_none(info.get('content').get('duration_in_ms'), scale=1000) + thumbnail = 'https://%s' % info.get('content').get('images').get('wide').get('main') + release_year = int_or_none(info.get('title').get('released_at')) + webpage_url = info.get('web_url') + + # for series + season_number = int_or_none(info.get('season_number')) + season_id = int_or_none(info.get('season_id')) + episode_number = int_or_none(info.get('episode_number')) + + + tags = [] + for tag in info.get('title').get('genres'): + tags.append(tag.get('name')) thumbnails = [] for key,image in info.get('content').get('images').get('wide').items(): @@ -50,7 +87,7 @@ class PuhuTVIE(InfoExtractor): format_dict = self._download_json( 'https://puhutv.com/api/assets/%s/videos' % id, - id, 'Downloading sources').get('data').get('videos') + video_id, 'Downloading sources').get('data').get('videos') if not format_dict: raise ExtractorError('This video not available in your country') @@ -62,7 +99,7 @@ class PuhuTVIE(InfoExtractor): if ext == 'hls': format_id = 'hls-%s' % quality formats.extend(self._extract_m3u8_formats( - media_url, id, 'm3u8', preference=-1, + media_url, video_id, 'm3u8', preference=-1, m3u8_id=format_id, fatal=False)) else: if format.get('is_playlist') == False: @@ -75,14 +112,21 @@ class PuhuTVIE(InfoExtractor): return { 'id': id, - 'slug_id': video_id, + 'display_id': video_id, 'title': title, - 'description': info.get('title').get('description'), - 'uploader': info.get('title').get('producer').get('name'), - 'view_count': info.get('content').get('watch_count'), - 'follower_count': info.get('title').get('follower_count'), - 'thumbnail': 'https://%s' % info.get('content').get('images').get('wide').get('main'), + 'description': description, + 'season_id': season_id, + 'season_number': season_number, + 'episode_number': episode_number, + 'release_year': release_year, + 'upload_date': upload_date, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'view_count': view_count, + 'duration': duration, + 'tags': tags, + 'webpage_url': webpage_url, + 'thumbnail': thumbnail, 'thumbnails': thumbnails, - 'loop_thumbnails': info.get('content').get('loop_thumbnails'), 'formats': formats } From 096307a00b6b2d4212840ffb29adef806260b7d1 Mon Sep 17 00:00:00 2001 From: Enes Date: Tue, 24 Apr 2018 21:38:37 +0300 Subject: [PATCH 03/18] [puhutv] adding test video --- youtube_dl/extractor/puhutv.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py index 857740053..d143bab97 100644 --- a/youtube_dl/extractor/puhutv.py +++ b/youtube_dl/extractor/puhutv.py @@ -15,7 +15,7 @@ from ..utils import ( class PuhuTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P[a-z0-9-]+)-izle' _TESTS = [ - { + { # A Film 'url': 'https://puhutv.com/sut-kardesler-izle', 'md5': '51f11ccdeef908753b4e3a99d19be939', 'info_dict': { @@ -30,19 +30,19 @@ class PuhuTVIE(InfoExtractor): 'upload_date': '20160729', }, }, - { + { # A Episode 'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle', - 'md5': '51f11ccdeef908753b4e3a99d19be939', + 'md5': 'e47096511f5ee1fee3586a5714955a25', 'info_dict': { 'id': '18501', - 'display_id': 'jet-sosyete', + 'display_id': 'jet-sosyete-1-bolum', 'ext': 'mp4', - 'title': 'Süt Kardeşler', + 'title': 'Jet Sosyete 1. Sezon 1. Bölüm', 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Arzu Film', - 'description': 'md5:405fd024df916ca16731114eb18e511a', - 'uploader_id': '43', - 'upload_date': '20160729', + 'uploader': 'BKM', + 'description': 'md5:0312864b87d6b9b917694a5742fffabd', + 'uploader_id': '269', + 'upload_date': '20180220', }, } ] From e6a74e5325228d8d7533e0977f0a047bb5a3526b Mon Sep 17 00:00:00 2001 From: Enes Date: Tue, 24 Apr 2018 21:39:31 +0300 Subject: [PATCH 04/18] [puhutv] adding test video --- youtube_dl/extractor/puhutv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py index d143bab97..4c51992ce 100644 --- a/youtube_dl/extractor/puhutv.py +++ b/youtube_dl/extractor/puhutv.py @@ -30,7 +30,7 @@ class PuhuTVIE(InfoExtractor): 'upload_date': '20160729', }, }, - { # A Episode + { # An Episode 'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle', 'md5': 'e47096511f5ee1fee3586a5714955a25', 'info_dict': { From 3b1d7610474754b2a664ace445b0f9b86fcfa79a Mon Sep 17 00:00:00 2001 From: Enes Date: Fri, 27 Apr 2018 18:11:36 +0300 Subject: [PATCH 05/18] [puhutv] added subtitle support --- youtube_dl/extractor/puhutv.py | 36 +++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py index 4c51992ce..1217976de 100644 --- a/youtube_dl/extractor/puhutv.py +++ b/youtube_dl/extractor/puhutv.py @@ -8,7 +8,8 @@ from ..compat import ( from ..utils import ( int_or_none, ExtractorError, - float_or_none + float_or_none, + determine_ext ) @@ -44,9 +45,32 @@ class PuhuTVIE(InfoExtractor): 'uploader_id': '269', 'upload_date': '20180220', }, + }, + { # Has subtitle + 'url': 'https://puhutv.com/dip-1-bolum-izle', + 'md5': 'ef912104860ad0496b73c57d7f03bf8e', + 'info_dict': { + 'id': '18944', + 'display_id': 'dip-1-bolum', + 'ext': 'mp4', + 'title': 'Dip 1. Sezon 1. Bölüm', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'TMC', + 'description': 'md5:8459001a7decfdc4104ca38a979a41fd', + 'uploader_id': '25', + 'upload_date': '20180330', + }, + 'params': { + 'skip_download': True, + } } ] IE_NAME = 'puhutv' + _SUBTITLE_LANGS = { # currently supported for some series + 'English':'en', + 'Deutsch':'de', + 'عربى':'ar' + } def _real_extract(self, url): video_id = self._match_id(url) @@ -85,6 +109,15 @@ class PuhuTVIE(InfoExtractor): 'id': key }) + subtitles = {} + for subtitle in info.get('content').get('subtitles'): + lang = subtitle.get('language') + sub_url = subtitle.get('url') + subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{ + 'url': sub_url, + 'ext': determine_ext(sub_url) + }] + format_dict = self._download_json( 'https://puhutv.com/api/assets/%s/videos' % id, video_id, 'Downloading sources').get('data').get('videos') @@ -125,6 +158,7 @@ class PuhuTVIE(InfoExtractor): 'view_count': view_count, 'duration': duration, 'tags': tags, + 'subtitles': subtitles, 'webpage_url': webpage_url, 'thumbnail': thumbnail, 'thumbnails': thumbnails, From 18c33dbf93e98544c0f0851165c6830dd0507f48 Mon Sep 17 00:00:00 2001 From: Enes Date: Sun, 29 Apr 2018 19:58:10 +0300 Subject: [PATCH 06/18] [puhutv] ignoring HLS formats and fixing sorting --- youtube_dl/extractor/puhutv.py | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py index 1217976de..ed06ea212 100644 --- a/youtube_dl/extractor/puhutv.py +++ b/youtube_dl/extractor/puhutv.py @@ -18,7 +18,7 @@ class PuhuTVIE(InfoExtractor): _TESTS = [ { # A Film 'url': 'https://puhutv.com/sut-kardesler-izle', - 'md5': '51f11ccdeef908753b4e3a99d19be939', + 'md5': 'a347470371d56e1585d1b2c8dab01c96', 'info_dict': { 'id': '5085', 'display_id': 'sut-kardesler', @@ -33,7 +33,7 @@ class PuhuTVIE(InfoExtractor): }, { # An Episode 'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle', - 'md5': 'e47096511f5ee1fee3586a5714955a25', + 'md5': '3cd1f4b931cff5e009dfa46a3b88a42a', 'info_dict': { 'id': '18501', 'display_id': 'jet-sosyete-1-bolum', @@ -48,7 +48,7 @@ class PuhuTVIE(InfoExtractor): }, { # Has subtitle 'url': 'https://puhutv.com/dip-1-bolum-izle', - 'md5': 'ef912104860ad0496b73c57d7f03bf8e', + 'md5': 'f27792b1169f42ab318c38887ad5b28e', 'info_dict': { 'id': '18944', 'display_id': 'dip-1-bolum', @@ -129,19 +129,12 @@ class PuhuTVIE(InfoExtractor): media_url = format.get('url') ext = format.get('video_format') quality = format.get('quality') - if ext == 'hls': - format_id = 'hls-%s' % quality - formats.extend(self._extract_m3u8_formats( - media_url, video_id, 'm3u8', preference=-1, - m3u8_id=format_id, fatal=False)) - else: - if format.get('is_playlist') == False: - formats.append({ - 'url': media_url, - 'format_id': 'http-%s' % quality, - 'ext': ext - }) - self._sort_formats(formats) + if ext == 'mp4' and format.get('is_playlist') == False: + formats.append({ + 'url': media_url, + 'format_id': 'http-%s' % quality, + 'ext': ext + }) return { 'id': id, From 39db8869a11ab42f04b565c0c218acd7f1c713c9 Mon Sep 17 00:00:00 2001 From: Enes Date: Sun, 29 Apr 2018 20:14:53 +0300 Subject: [PATCH 07/18] [puhutv] 'id' and 'display_id' replaced --- youtube_dl/extractor/puhutv.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py index ed06ea212..16b9eeba4 100644 --- a/youtube_dl/extractor/puhutv.py +++ b/youtube_dl/extractor/puhutv.py @@ -20,8 +20,8 @@ class PuhuTVIE(InfoExtractor): 'url': 'https://puhutv.com/sut-kardesler-izle', 'md5': 'a347470371d56e1585d1b2c8dab01c96', 'info_dict': { - 'id': '5085', - 'display_id': 'sut-kardesler', + 'id': 'sut-kardesler', + 'display_id': '5085', 'ext': 'mp4', 'title': 'Süt Kardeşler', 'thumbnail': r're:^https?://.*\.jpg$', @@ -35,8 +35,8 @@ class PuhuTVIE(InfoExtractor): 'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle', 'md5': '3cd1f4b931cff5e009dfa46a3b88a42a', 'info_dict': { - 'id': '18501', - 'display_id': 'jet-sosyete-1-bolum', + 'id': 'jet-sosyete-1-bolum', + 'display_id': '18501', 'ext': 'mp4', 'title': 'Jet Sosyete 1. Sezon 1. Bölüm', 'thumbnail': r're:^https?://.*\.jpg$', @@ -50,8 +50,8 @@ class PuhuTVIE(InfoExtractor): 'url': 'https://puhutv.com/dip-1-bolum-izle', 'md5': 'f27792b1169f42ab318c38887ad5b28e', 'info_dict': { - 'id': '18944', - 'display_id': 'dip-1-bolum', + 'id': 'dip-1-bolum', + 'display_id': '18944', 'ext': 'mp4', 'title': 'Dip 1. Sezon 1. Bölüm', 'thumbnail': r're:^https?://.*\.jpg$', @@ -78,7 +78,7 @@ class PuhuTVIE(InfoExtractor): 'https://puhutv.com/api/slug/%s-izle' % video_id, video_id).get('data') - id = compat_str(info.get('id')) + display_id = compat_str(info.get('id')) title = info.get('title').get('name') if(info.get('display_name')): title += ' ' + info.get('display_name') @@ -119,7 +119,7 @@ class PuhuTVIE(InfoExtractor): }] format_dict = self._download_json( - 'https://puhutv.com/api/assets/%s/videos' % id, + 'https://puhutv.com/api/assets/%s/videos' % display_id, video_id, 'Downloading sources').get('data').get('videos') if not format_dict: raise ExtractorError('This video not available in your country') @@ -137,8 +137,8 @@ class PuhuTVIE(InfoExtractor): }) return { - 'id': id, - 'display_id': video_id, + 'id': video_id, + 'display_id': display_id, 'title': title, 'description': description, 'season_id': season_id, From 85d699c6d22ff88d94c70d9543e91a7b5dcfdfdb Mon Sep 17 00:00:00 2001 From: Enes Date: Mon, 30 Apr 2018 19:02:20 +0300 Subject: [PATCH 08/18] [puhutv] Support for playlists --- youtube_dl/extractor/extractors.py | 7 +++- youtube_dl/extractor/puhutv.py | 61 ++++++++++++++++++++++++++++-- 2 files changed, 62 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 10e2152db..4240c8f97 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -849,7 +849,10 @@ from .pornhub import ( from .pornotube import PornotubeIE from .pornovoisines import PornoVoisinesIE from .pornoxo import PornoXOIE -from .puhutv import PuhuTVIE +from .puhutv import ( + PuhuTVIE, + PuhuTVSeasonIE, +) from .presstv import PressTVIE from .primesharetv import PrimeShareTVIE from .promptfile import PromptFileIE @@ -1333,7 +1336,7 @@ from .webofstories import ( WebOfStoriesPlaylistIE, ) from .weibo import ( - WeiboIE, + WeiboIE, WeiboMobileIE ) from .weiqitv import WeiqiTVIE diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py index 16b9eeba4..b4f4dcf55 100644 --- a/youtube_dl/extractor/puhutv.py +++ b/youtube_dl/extractor/puhutv.py @@ -2,9 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import ( - compat_str -) +from ..compat import compat_str from ..utils import ( int_or_none, ExtractorError, @@ -97,7 +95,6 @@ class PuhuTVIE(InfoExtractor): season_id = int_or_none(info.get('season_id')) episode_number = int_or_none(info.get('episode_number')) - tags = [] for tag in info.get('title').get('genres'): tags.append(tag.get('name')) @@ -157,3 +154,59 @@ class PuhuTVIE(InfoExtractor): 'thumbnails': thumbnails, 'formats': formats } + + +class PuhuTVSeasonIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P[a-z0-9-]+)-detay' + IE_NAME = 'puhutv:season' + _TESTS = [{ + 'url': 'https://puhutv.com/deniz-yildizi-detay', + 'info_dict': { + 'title': 'Deniz Yıldızı', + 'id': 'deniz-yildizi', + }, + 'playlist_mincount': 10, + }] + + def _extract_entries(self, playlist_id, seasons): + for season in seasons: + season_id = season.get('id') + season_number = season.get('position') + pagenum = 1 + has_more = True + while has_more == True: + query = { + 'page': pagenum, + 'per': 40, + } + season_info = self._download_json( + 'https://galadriel.puhutv.com/seasons/%s' % season_id, + playlist_id, 'Downloading season %s page %s' % (season_number, pagenum), query=query) + for episode in season_info.get('episodes'): + video_id = episode.get('slugPath').replace('-izle', '') + yield self.url_result( + 'https://puhutv.com/%s-izle' % video_id, + PuhuTVIE.ie_key(), video_id) + pagenum = pagenum + 1 + has_more = season_info.get('hasMore') + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + info = self._download_json( + 'https://puhutv.com/api/slug/%s-detay' % playlist_id, + playlist_id).get('data') + + title = info.get('name') + uploader = info.get('producer').get('name') + uploader_id = info.get('producer').get('id') + seasons = info.get('seasons') + + return { + '_type': 'playlist', + 'id': playlist_id, + 'title': title, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'entries': self._extract_entries(playlist_id, seasons), + } From 3f82c473945133152934b83ae1d42e20b0f8aa3f Mon Sep 17 00:00:00 2001 From: Enes Date: Tue, 1 May 2018 15:23:31 +0300 Subject: [PATCH 09/18] [puhutv] improvement --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/puhutv.py | 98 ++++++++++++++++++++---------- 2 files changed, 66 insertions(+), 34 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4240c8f97..ac311fab4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -851,7 +851,7 @@ from .pornovoisines import PornoVoisinesIE from .pornoxo import PornoXOIE from .puhutv import ( PuhuTVIE, - PuhuTVSeasonIE, + PuhuTVSerieIE, ) from .presstv import PressTVIE from .primesharetv import PrimeShareTVIE diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py index b4f4dcf55..d9cdb2a1d 100644 --- a/youtube_dl/extractor/puhutv.py +++ b/youtube_dl/extractor/puhutv.py @@ -29,7 +29,7 @@ class PuhuTVIE(InfoExtractor): 'upload_date': '20160729', }, }, - { # An Episode + { # An Episode and geo restricted 'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle', 'md5': '3cd1f4b931cff5e009dfa46a3b88a42a', 'info_dict': { @@ -39,7 +39,7 @@ class PuhuTVIE(InfoExtractor): 'title': 'Jet Sosyete 1. Sezon 1. Bölüm', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'BKM', - 'description': 'md5:0312864b87d6b9b917694a5742fffabd', + 'description': 'md5:18ba5abe6d19f8063a8348445c41e28f', 'uploader_id': '269', 'upload_date': '20180220', }, @@ -54,7 +54,7 @@ class PuhuTVIE(InfoExtractor): 'title': 'Dip 1. Sezon 1. Bölüm', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'TMC', - 'description': 'md5:8459001a7decfdc4104ca38a979a41fd', + 'description': 'md5:e8ddb56738b093b4eae0a536e2ea02c2', 'uploader_id': '25', 'upload_date': '20180330', }, @@ -72,23 +72,33 @@ class PuhuTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + # API call info = self._download_json( 'https://puhutv.com/api/slug/%s-izle' % video_id, - video_id).get('data') + video_id, fatal=False) + info = info.get('data') display_id = compat_str(info.get('id')) - title = info.get('title').get('name') - if(info.get('display_name')): + title = info.get('title', {}).get('name') + if(info.get('display_name') and title is not None): title += ' ' + info.get('display_name') - description = info.get('title').get('description') - upload_date = info.get('created_at').split('T')[0].replace('-', '') - uploader = info.get('title').get('producer').get('name') - uploader_id = compat_str(info.get('title').get('producer').get('id')) - view_count = int_or_none(info.get('content').get('watch_count')) - duration = float_or_none(info.get('content').get('duration_in_ms'), scale=1000) - thumbnail = 'https://%s' % info.get('content').get('images').get('wide').get('main') - release_year = int_or_none(info.get('title').get('released_at')) + description = info.get('title', {}).get('description') + + upload_date = info.get('created_at', '').split('T')[0].replace('-', '') + if upload_date is '': + upload_date = None + uploader = info.get('title', {}).get('producer', {}).get('name') + uploader_id = info.get('title', {}).get('producer', {}).get('id') + if uploader_id is not None: + uploader_id = compat_str(uploader_id) + view_count = int_or_none(info.get('content', {}).get('watch_count')) + duration = float_or_none(info.get('content', {}).get('duration_in_ms'), scale=1000) + thumbnail = 'https://%s' % info.get('content', {}).get('images', {}).get('wide', {}).get('main') + release_year = int_or_none(info.get('title', {}).get('released_at')) webpage_url = info.get('web_url') + tags_list = info.get('title', {}).get('genres', {}) + thumbnails_list = info.get('content', {}).get('images', {}).get('wide', {}) + subtitles_list = info.get('content', {}).get('subtitles', {}) # for series season_number = int_or_none(info.get('season_number')) @@ -96,18 +106,18 @@ class PuhuTVIE(InfoExtractor): episode_number = int_or_none(info.get('episode_number')) tags = [] - for tag in info.get('title').get('genres'): + for tag in tags_list: tags.append(tag.get('name')) thumbnails = [] - for key,image in info.get('content').get('images').get('wide').items(): + for id,url in thumbnails_list.items(): thumbnails.append({ - 'url': image, - 'id': key + 'url': url, + 'id': id }) subtitles = {} - for subtitle in info.get('content').get('subtitles'): + for subtitle in subtitles_list: lang = subtitle.get('language') sub_url = subtitle.get('url') subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{ @@ -117,9 +127,9 @@ class PuhuTVIE(InfoExtractor): format_dict = self._download_json( 'https://puhutv.com/api/assets/%s/videos' % display_id, - video_id, 'Downloading sources').get('data').get('videos') + video_id, 'Downloading video JSON').get('data').get('videos') if not format_dict: - raise ExtractorError('This video not available in your country') + self.raise_geo_restricted() formats = [] for format in format_dict: @@ -156,17 +166,31 @@ class PuhuTVIE(InfoExtractor): } -class PuhuTVSeasonIE(InfoExtractor): +class PuhuTVSerieIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P[a-z0-9-]+)-detay' - IE_NAME = 'puhutv:season' - _TESTS = [{ - 'url': 'https://puhutv.com/deniz-yildizi-detay', - 'info_dict': { - 'title': 'Deniz Yıldızı', - 'id': 'deniz-yildizi', + IE_NAME = 'puhutv:serie' + _TESTS = [ + { + 'url': 'https://puhutv.com/deniz-yildizi-detay', + 'info_dict': { + 'title': 'Deniz Yıldızı', + 'id': 'deniz-yildizi', + 'uploader': 'Focus Film', + 'uploader_id': 61, + }, + 'playlist_mincount': 10, }, - 'playlist_mincount': 10, - }] + { # a film detail page which is using same url with serie page + 'url': 'https://puhutv.com/kaybedenler-kulubu-detay', + 'info_dict': { + 'title': 'Kaybedenler Kulübü', + 'id': 'kaybedenler-kulubu', + 'uploader': 'Tolga Örnek, Murat Dörtbudak, Neslihan Dörtbudak, Kemal Kaplanoğlu', + 'uploader_id': 248, + }, + 'playlist_mincount': 1, + }, + ] def _extract_entries(self, playlist_id, seasons): for season in seasons: @@ -198,9 +222,17 @@ class PuhuTVSeasonIE(InfoExtractor): playlist_id).get('data') title = info.get('name') - uploader = info.get('producer').get('name') - uploader_id = info.get('producer').get('id') + uploader = info.get('producer', {}).get('name') + uploader_id = info.get('producer', {}).get('id') seasons = info.get('seasons') + if seasons: + entries = self._extract_entries(playlist_id, seasons) + else: + # For films, these are using same url with series + video_id = info.get('assets')[0].get('slug') + return self.url_result( + 'https://puhutv.com/%s-izle' % video_id, + PuhuTVIE.ie_key(), video_id) return { '_type': 'playlist', @@ -208,5 +240,5 @@ class PuhuTVSeasonIE(InfoExtractor): 'title': title, 'uploader': uploader, 'uploader_id': uploader_id, - 'entries': self._extract_entries(playlist_id, seasons), + 'entries': entries, } From 21c1f8d049495d611fddfb9ba955e2890763830d Mon Sep 17 00:00:00 2001 From: Enes Date: Tue, 1 May 2018 15:33:54 +0300 Subject: [PATCH 10/18] [puhutv] checked with flake8 and fixed --- youtube_dl/extractor/puhutv.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py index d9cdb2a1d..6ed256c57 100644 --- a/youtube_dl/extractor/puhutv.py +++ b/youtube_dl/extractor/puhutv.py @@ -5,7 +5,6 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( int_or_none, - ExtractorError, float_or_none, determine_ext ) @@ -14,7 +13,7 @@ from ..utils import ( class PuhuTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P[a-z0-9-]+)-izle' _TESTS = [ - { # A Film + { # A Film 'url': 'https://puhutv.com/sut-kardesler-izle', 'md5': 'a347470371d56e1585d1b2c8dab01c96', 'info_dict': { @@ -29,7 +28,7 @@ class PuhuTVIE(InfoExtractor): 'upload_date': '20160729', }, }, - { # An Episode and geo restricted + { # An Episode and geo restricted 'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle', 'md5': '3cd1f4b931cff5e009dfa46a3b88a42a', 'info_dict': { @@ -44,7 +43,7 @@ class PuhuTVIE(InfoExtractor): 'upload_date': '20180220', }, }, - { # Has subtitle + { # Has subtitle 'url': 'https://puhutv.com/dip-1-bolum-izle', 'md5': 'f27792b1169f42ab318c38887ad5b28e', 'info_dict': { @@ -64,10 +63,10 @@ class PuhuTVIE(InfoExtractor): } ] IE_NAME = 'puhutv' - _SUBTITLE_LANGS = { # currently supported for some series - 'English':'en', - 'Deutsch':'de', - 'عربى':'ar' + _SUBTITLE_LANGS = { # currently supported for some series + 'English': 'en', + 'Deutsch': 'de', + 'عربى': 'ar' } def _real_extract(self, url): @@ -110,7 +109,7 @@ class PuhuTVIE(InfoExtractor): tags.append(tag.get('name')) thumbnails = [] - for id,url in thumbnails_list.items(): + for id, url in thumbnails_list.items(): thumbnails.append({ 'url': url, 'id': id @@ -136,7 +135,7 @@ class PuhuTVIE(InfoExtractor): media_url = format.get('url') ext = format.get('video_format') quality = format.get('quality') - if ext == 'mp4' and format.get('is_playlist') == False: + if ext == 'mp4' and format.get('is_playlist') is False: formats.append({ 'url': media_url, 'format_id': 'http-%s' % quality, @@ -180,7 +179,7 @@ class PuhuTVSerieIE(InfoExtractor): }, 'playlist_mincount': 10, }, - { # a film detail page which is using same url with serie page + { # a film detail page which is using same url with serie page 'url': 'https://puhutv.com/kaybedenler-kulubu-detay', 'info_dict': { 'title': 'Kaybedenler Kulübü', @@ -198,14 +197,14 @@ class PuhuTVSerieIE(InfoExtractor): season_number = season.get('position') pagenum = 1 has_more = True - while has_more == True: + while has_more is True: query = { 'page': pagenum, 'per': 40, } season_info = self._download_json( 'https://galadriel.puhutv.com/seasons/%s' % season_id, - playlist_id, 'Downloading season %s page %s' % (season_number, pagenum), query=query) + playlist_id, 'Downloading season %s page %s' % (season_number, pagenum), query=query) for episode in season_info.get('episodes'): video_id = episode.get('slugPath').replace('-izle', '') yield self.url_result( @@ -219,7 +218,7 @@ class PuhuTVSerieIE(InfoExtractor): info = self._download_json( 'https://puhutv.com/api/slug/%s-detay' % playlist_id, - playlist_id).get('data') + playlist_id).get('data') title = info.get('name') uploader = info.get('producer', {}).get('name') From e257b8dc2dcf0616e0fba5a6c055725e4d1c2b51 Mon Sep 17 00:00:00 2001 From: Enes Date: Tue, 1 May 2018 20:58:03 +0300 Subject: [PATCH 11/18] [puhutv] removing fatal=false --- youtube_dl/extractor/puhutv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py index 6ed256c57..6ed012e5e 100644 --- a/youtube_dl/extractor/puhutv.py +++ b/youtube_dl/extractor/puhutv.py @@ -74,7 +74,7 @@ class PuhuTVIE(InfoExtractor): # API call info = self._download_json( 'https://puhutv.com/api/slug/%s-izle' % video_id, - video_id, fatal=False) + video_id) info = info.get('data') display_id = compat_str(info.get('id')) From 70a61e595f9ac851c39f5958954065749206e2e5 Mon Sep 17 00:00:00 2001 From: Enes Date: Wed, 9 May 2018 17:27:14 +0300 Subject: [PATCH 12/18] [puhutv] improvement --- youtube_dl/extractor/puhutv.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py index 6ed012e5e..e0fad72ed 100644 --- a/youtube_dl/extractor/puhutv.py +++ b/youtube_dl/extractor/puhutv.py @@ -106,7 +106,8 @@ class PuhuTVIE(InfoExtractor): tags = [] for tag in tags_list: - tags.append(tag.get('name')) + if tag.get('name'): + tags.append(tag.get('name')) thumbnails = [] for id, url in thumbnails_list.items(): @@ -119,21 +120,27 @@ class PuhuTVIE(InfoExtractor): for subtitle in subtitles_list: lang = subtitle.get('language') sub_url = subtitle.get('url') + # If the keys were changed by api, continue + if not lang or not sub_url: + continue subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{ 'url': sub_url, 'ext': determine_ext(sub_url) }] - format_dict = self._download_json( + # Some of videos are geo restricted upon request copyright owner and returns 403 + req_formats = self._download_json( 'https://puhutv.com/api/assets/%s/videos' % display_id, - video_id, 'Downloading video JSON').get('data').get('videos') - if not format_dict: + video_id, 'Downloading video JSON') + if not req_formats: self.raise_geo_restricted() + else: + format_dict = req_formats.get('data').get('videos') formats = [] for format in format_dict: media_url = format.get('url') - ext = format.get('video_format') + ext = format.get('video_format') or determine_ext(media_url) quality = format.get('quality') if ext == 'mp4' and format.get('is_playlist') is False: formats.append({ @@ -211,7 +218,7 @@ class PuhuTVSerieIE(InfoExtractor): 'https://puhutv.com/%s-izle' % video_id, PuhuTVIE.ie_key(), video_id) pagenum = pagenum + 1 - has_more = season_info.get('hasMore') + has_more = season_info.get('hasMore', False) def _real_extract(self, url): playlist_id = self._match_id(url) From a17685542deca8caab737643fecc3fa439e034c8 Mon Sep 17 00:00:00 2001 From: Enes Date: Thu, 17 May 2018 16:28:20 +0300 Subject: [PATCH 13/18] [puhutv] improvement --- youtube_dl/extractor/puhutv.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py index e0fad72ed..540e94a00 100644 --- a/youtube_dl/extractor/puhutv.py +++ b/youtube_dl/extractor/puhutv.py @@ -6,7 +6,11 @@ from ..compat import compat_str from ..utils import ( int_or_none, float_or_none, - determine_ext + determine_ext, + parse_iso8601, + str_or_none, + unified_strdate, + urljoin ) @@ -73,26 +77,22 @@ class PuhuTVIE(InfoExtractor): video_id = self._match_id(url) # API call info = self._download_json( - 'https://puhutv.com/api/slug/%s-izle' % video_id, - video_id) + 'https://puhutv.com/api/slug/%s-izle' % video_id, video_id) info = info.get('data') display_id = compat_str(info.get('id')) - title = info.get('title', {}).get('name') + title = info.get('title').get('name') if(info.get('display_name') and title is not None): title += ' ' + info.get('display_name') description = info.get('title', {}).get('description') - upload_date = info.get('created_at', '').split('T')[0].replace('-', '') - if upload_date is '': - upload_date = None + timestamp = parse_iso8601(info.get('created_at')) + upload_date = unified_strdate(info.get('created_at')) uploader = info.get('title', {}).get('producer', {}).get('name') - uploader_id = info.get('title', {}).get('producer', {}).get('id') - if uploader_id is not None: - uploader_id = compat_str(uploader_id) + uploader_id = str_or_none(info.get('title', {}).get('producer', {}).get('id')) view_count = int_or_none(info.get('content', {}).get('watch_count')) duration = float_or_none(info.get('content', {}).get('duration_in_ms'), scale=1000) - thumbnail = 'https://%s' % info.get('content', {}).get('images', {}).get('wide', {}).get('main') + thumbnail = urljoin('https://', info.get('content', {}).get('images', {}).get('wide', {}).get('main')) release_year = int_or_none(info.get('title', {}).get('released_at')) webpage_url = info.get('web_url') tags_list = info.get('title', {}).get('genres', {}) @@ -111,6 +111,7 @@ class PuhuTVIE(InfoExtractor): thumbnails = [] for id, url in thumbnails_list.items(): + url = urljoin('https://', url) thumbnails.append({ 'url': url, 'id': id @@ -131,7 +132,7 @@ class PuhuTVIE(InfoExtractor): # Some of videos are geo restricted upon request copyright owner and returns 403 req_formats = self._download_json( 'https://puhutv.com/api/assets/%s/videos' % display_id, - video_id, 'Downloading video JSON') + video_id, 'Downloading video JSON', fatal=False) if not req_formats: self.raise_geo_restricted() else: @@ -142,7 +143,7 @@ class PuhuTVIE(InfoExtractor): media_url = format.get('url') ext = format.get('video_format') or determine_ext(media_url) quality = format.get('quality') - if ext == 'mp4' and format.get('is_playlist') is False: + if ext == 'mp4' and format.get('is_playlist', False) is False: formats.append({ 'url': media_url, 'format_id': 'http-%s' % quality, @@ -159,6 +160,7 @@ class PuhuTVIE(InfoExtractor): 'episode_number': episode_number, 'release_year': release_year, 'upload_date': upload_date, + 'timestamp': timestamp, 'uploader': uploader, 'uploader_id': uploader_id, 'view_count': view_count, @@ -184,7 +186,7 @@ class PuhuTVSerieIE(InfoExtractor): 'uploader': 'Focus Film', 'uploader_id': 61, }, - 'playlist_mincount': 10, + 'playlist_mincount': 234, }, { # a film detail page which is using same url with serie page 'url': 'https://puhutv.com/kaybedenler-kulubu-detay', From f5f1ca888093842326aa38394d27d42f8ced3a45 Mon Sep 17 00:00:00 2001 From: Enes Date: Thu, 17 May 2018 16:50:33 +0300 Subject: [PATCH 14/18] [puhutv] added timestamp to test dict --- youtube_dl/extractor/puhutv.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py index 540e94a00..3b8a2419c 100644 --- a/youtube_dl/extractor/puhutv.py +++ b/youtube_dl/extractor/puhutv.py @@ -30,6 +30,7 @@ class PuhuTVIE(InfoExtractor): 'description': 'md5:405fd024df916ca16731114eb18e511a', 'uploader_id': '43', 'upload_date': '20160729', + 'timestamp': int, }, }, { # An Episode and geo restricted @@ -45,6 +46,7 @@ class PuhuTVIE(InfoExtractor): 'description': 'md5:18ba5abe6d19f8063a8348445c41e28f', 'uploader_id': '269', 'upload_date': '20180220', + 'timestamp': int, }, }, { # Has subtitle @@ -60,6 +62,7 @@ class PuhuTVIE(InfoExtractor): 'description': 'md5:e8ddb56738b093b4eae0a536e2ea02c2', 'uploader_id': '25', 'upload_date': '20180330', + 'timestamp': int, }, 'params': { 'skip_download': True, From b521306bb57c820aa6f4eaeaa0ef60688dfb66a4 Mon Sep 17 00:00:00 2001 From: Enes Date: Tue, 5 Jun 2018 20:19:05 +0300 Subject: [PATCH 15/18] [puhutv] improvement --- youtube_dl/extractor/puhutv.py | 115 ++++++++++++++------------------- 1 file changed, 49 insertions(+), 66 deletions(-) diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py index 3b8a2419c..72900215e 100644 --- a/youtube_dl/extractor/puhutv.py +++ b/youtube_dl/extractor/puhutv.py @@ -10,14 +10,18 @@ from ..utils import ( parse_iso8601, str_or_none, unified_strdate, - urljoin + urljoin, + url_basename, + remove_end ) class PuhuTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P[a-z0-9-]+)-izle' + IE_NAME = 'puhutv' _TESTS = [ - { # A Film + { + # A Film 'url': 'https://puhutv.com/sut-kardesler-izle', 'md5': 'a347470371d56e1585d1b2c8dab01c96', 'info_dict': { @@ -33,44 +37,18 @@ class PuhuTVIE(InfoExtractor): 'timestamp': int, }, }, - { # An Episode and geo restricted + { + # An Episode and geo restricted 'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle', - 'md5': '3cd1f4b931cff5e009dfa46a3b88a42a', - 'info_dict': { - 'id': 'jet-sosyete-1-bolum', - 'display_id': '18501', - 'ext': 'mp4', - 'title': 'Jet Sosyete 1. Sezon 1. Bölüm', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'BKM', - 'description': 'md5:18ba5abe6d19f8063a8348445c41e28f', - 'uploader_id': '269', - 'upload_date': '20180220', - 'timestamp': int, - }, + 'only_matching': True, }, - { # Has subtitle + { + # Has subtitle 'url': 'https://puhutv.com/dip-1-bolum-izle', - 'md5': 'f27792b1169f42ab318c38887ad5b28e', - 'info_dict': { - 'id': 'dip-1-bolum', - 'display_id': '18944', - 'ext': 'mp4', - 'title': 'Dip 1. Sezon 1. Bölüm', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'TMC', - 'description': 'md5:e8ddb56738b093b4eae0a536e2ea02c2', - 'uploader_id': '25', - 'upload_date': '20180330', - 'timestamp': int, - }, - 'params': { - 'skip_download': True, - } + 'only_matching': True, } ] - IE_NAME = 'puhutv' - _SUBTITLE_LANGS = { # currently supported for some series + _SUBTITLE_LANGS = { 'English': 'en', 'Deutsch': 'de', 'عربى': 'ar' @@ -78,17 +56,15 @@ class PuhuTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - # API call info = self._download_json( - 'https://puhutv.com/api/slug/%s-izle' % video_id, video_id) + 'https://puhutv.com/api/slug/%s-izle' % video_id, video_id)['data'] - info = info.get('data') - display_id = compat_str(info.get('id')) - title = info.get('title').get('name') + display_id = compat_str(info['id']) + title = info['title']['name'] if(info.get('display_name') and title is not None): title += ' ' + info.get('display_name') - description = info.get('title', {}).get('description') + description = info.get('title', {}).get('description') or info.get('description') timestamp = parse_iso8601(info.get('created_at')) upload_date = unified_strdate(info.get('created_at')) uploader = info.get('title', {}).get('producer', {}).get('name') @@ -98,9 +74,6 @@ class PuhuTVIE(InfoExtractor): thumbnail = urljoin('https://', info.get('content', {}).get('images', {}).get('wide', {}).get('main')) release_year = int_or_none(info.get('title', {}).get('released_at')) webpage_url = info.get('web_url') - tags_list = info.get('title', {}).get('genres', {}) - thumbnails_list = info.get('content', {}).get('images', {}).get('wide', {}) - subtitles_list = info.get('content', {}).get('subtitles', {}) # for series season_number = int_or_none(info.get('season_number')) @@ -108,24 +81,29 @@ class PuhuTVIE(InfoExtractor): episode_number = int_or_none(info.get('episode_number')) tags = [] - for tag in tags_list: - if tag.get('name'): + for tag in info.get('title', {}).get('genres', {}): + if isinstance(tag.get('name'), compat_str): tags.append(tag.get('name')) thumbnails = [] - for id, url in thumbnails_list.items(): - url = urljoin('https://', url) - thumbnails.append({ - 'url': url, - 'id': id - }) + thumbs_dict = info.get('content', {}).get('images', {}).get('wide', {}) + if isinstance(thumbs_dict, dict): + for id, url in thumbs_dict.items(): + if not url or not isinstance(url, compat_str): + continue + url = urljoin('https://', url) + thumbnails.append({ + 'url': url, + 'id': id + }) subtitles = {} - for subtitle in subtitles_list: + for subtitle in info.get('content', {}).get('subtitles', {}): + if not isinstance(subtitle, dict): + continue lang = subtitle.get('language') sub_url = subtitle.get('url') - # If the keys were changed by api, continue - if not lang or not sub_url: + if not lang or not isinstance(lang, compat_str) or not sub_url or not isinstance(sub_url, compat_str): continue subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{ 'url': sub_url, @@ -138,20 +116,25 @@ class PuhuTVIE(InfoExtractor): video_id, 'Downloading video JSON', fatal=False) if not req_formats: self.raise_geo_restricted() - else: - format_dict = req_formats.get('data').get('videos') formats = [] - for format in format_dict: + for format in req_formats['data']['videos']: media_url = format.get('url') + if not media_url or not isinstance(media_url, compat_str): + continue ext = format.get('video_format') or determine_ext(media_url) quality = format.get('quality') - if ext == 'mp4' and format.get('is_playlist', False) is False: + if format.get('stream_type') == 'hls' and format.get('is_playlist') is True: + m3u8_id = remove_end(url_basename(media_url), '.m3u8') + formats.append(self._m3u8_meta_format(media_url, ext, m3u8_id=m3u8_id)) + elif ext == 'mp4' and format.get('is_playlist', False) is False: formats.append({ 'url': media_url, 'format_id': 'http-%s' % quality, - 'ext': ext + 'ext': ext, + 'height': quality }) + self._sort_formats(formats) return { 'id': video_id, @@ -191,7 +174,8 @@ class PuhuTVSerieIE(InfoExtractor): }, 'playlist_mincount': 234, }, - { # a film detail page which is using same url with serie page + { + # a film detail page which is using same url with serie page 'url': 'https://puhutv.com/kaybedenler-kulubu-detay', 'info_dict': { 'title': 'Kaybedenler Kulübü', @@ -205,7 +189,7 @@ class PuhuTVSerieIE(InfoExtractor): def _extract_entries(self, playlist_id, seasons): for season in seasons: - season_id = season.get('id') + season_id = season['id'] season_number = season.get('position') pagenum = 1 has_more = True @@ -218,7 +202,7 @@ class PuhuTVSerieIE(InfoExtractor): 'https://galadriel.puhutv.com/seasons/%s' % season_id, playlist_id, 'Downloading season %s page %s' % (season_number, pagenum), query=query) for episode in season_info.get('episodes'): - video_id = episode.get('slugPath').replace('-izle', '') + video_id = episode['slugPath'].replace('-izle', '') yield self.url_result( 'https://puhutv.com/%s-izle' % video_id, PuhuTVIE.ie_key(), video_id) @@ -229,8 +213,7 @@ class PuhuTVSerieIE(InfoExtractor): playlist_id = self._match_id(url) info = self._download_json( - 'https://puhutv.com/api/slug/%s-detay' % playlist_id, - playlist_id).get('data') + 'https://puhutv.com/api/slug/%s-detay' % playlist_id, playlist_id)['data'] title = info.get('name') uploader = info.get('producer', {}).get('name') @@ -240,7 +223,7 @@ class PuhuTVSerieIE(InfoExtractor): entries = self._extract_entries(playlist_id, seasons) else: # For films, these are using same url with series - video_id = info.get('assets')[0].get('slug') + video_id = info['assets'][0]['slug'] return self.url_result( 'https://puhutv.com/%s-izle' % video_id, PuhuTVIE.ie_key(), video_id) From 09fa98fa4c4c43efe7d6f01415e37888779bbe4e Mon Sep 17 00:00:00 2001 From: Enes Date: Mon, 11 Jun 2018 23:14:32 +0300 Subject: [PATCH 16/18] [puhutv] fixed as requested --- youtube_dl/extractor/puhutv.py | 67 +++++++++++++++------------------- 1 file changed, 30 insertions(+), 37 deletions(-) diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py index 72900215e..86ed4d3ef 100644 --- a/youtube_dl/extractor/puhutv.py +++ b/youtube_dl/extractor/puhutv.py @@ -7,10 +7,10 @@ from ..utils import ( int_or_none, float_or_none, determine_ext, - parse_iso8601, str_or_none, unified_strdate, - urljoin, + unified_timestamp, + try_get, url_basename, remove_end ) @@ -61,44 +61,41 @@ class PuhuTVIE(InfoExtractor): display_id = compat_str(info['id']) title = info['title']['name'] - if(info.get('display_name') and title is not None): - title += ' ' + info.get('display_name') + if isinstance(info.get('display_name'), compat_str): + title = '%s %s' % (title, info.get('display_name')) - description = info.get('title', {}).get('description') or info.get('description') - timestamp = parse_iso8601(info.get('created_at')) + description = try_get(info, lambda x: x['title']['description'], compat_str) or info.get('description') + timestamp = unified_timestamp(info.get('created_at')) upload_date = unified_strdate(info.get('created_at')) - uploader = info.get('title', {}).get('producer', {}).get('name') - uploader_id = str_or_none(info.get('title', {}).get('producer', {}).get('id')) - view_count = int_or_none(info.get('content', {}).get('watch_count')) - duration = float_or_none(info.get('content', {}).get('duration_in_ms'), scale=1000) - thumbnail = urljoin('https://', info.get('content', {}).get('images', {}).get('wide', {}).get('main')) - release_year = int_or_none(info.get('title', {}).get('released_at')) + uploader = try_get(info, lambda x: x['title']['producer']['name'], compat_str) + uploader_id = str_or_none(try_get(info, lambda x: x['title']['producer']['id'])) + view_count = int_or_none(try_get(info, lambda x: x['content']['watch_count'])) + duration = float_or_none(try_get(info, lambda x: x['content']['duration_in_ms']), scale=1000) + thumbnail = try_get(info, lambda x: x['content']['images']['wide']['main'], compat_str) + release_year = int_or_none(try_get(info, lambda x: x['title']['released_at'])) webpage_url = info.get('web_url') - # for series season_number = int_or_none(info.get('season_number')) season_id = int_or_none(info.get('season_id')) episode_number = int_or_none(info.get('episode_number')) tags = [] - for tag in info.get('title', {}).get('genres', {}): + for tag in try_get(info, lambda x: x['title']['genres'], list) or []: if isinstance(tag.get('name'), compat_str): tags.append(tag.get('name')) thumbnails = [] - thumbs_dict = info.get('content', {}).get('images', {}).get('wide', {}) - if isinstance(thumbs_dict, dict): - for id, url in thumbs_dict.items(): - if not url or not isinstance(url, compat_str): - continue - url = urljoin('https://', url) - thumbnails.append({ - 'url': url, - 'id': id - }) + thumbs_dict = try_get(info, lambda x: x['content']['images']['wide'], dict) or {} + for id, url in thumbs_dict.items(): + if not url or not isinstance(url, compat_str): + continue + thumbnails.append({ + 'url': 'https://%s' % url, + 'id': id + }) subtitles = {} - for subtitle in info.get('content', {}).get('subtitles', {}): + for subtitle in try_get(info, lambda x: x['content']['subtitles'], list) or []: if not isinstance(subtitle, dict): continue lang = subtitle.get('language') @@ -106,16 +103,13 @@ class PuhuTVIE(InfoExtractor): if not lang or not isinstance(lang, compat_str) or not sub_url or not isinstance(sub_url, compat_str): continue subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{ - 'url': sub_url, - 'ext': determine_ext(sub_url) + 'url': sub_url }] # Some of videos are geo restricted upon request copyright owner and returns 403 req_formats = self._download_json( 'https://puhutv.com/api/assets/%s/videos' % display_id, - video_id, 'Downloading video JSON', fatal=False) - if not req_formats: - self.raise_geo_restricted() + video_id, 'Downloading video JSON') formats = [] for format in req_formats['data']['videos']: @@ -194,13 +188,12 @@ class PuhuTVSerieIE(InfoExtractor): pagenum = 1 has_more = True while has_more is True: - query = { - 'page': pagenum, - 'per': 40, - } season_info = self._download_json( 'https://galadriel.puhutv.com/seasons/%s' % season_id, - playlist_id, 'Downloading season %s page %s' % (season_number, pagenum), query=query) + playlist_id, 'Downloading season %s page %s' % (season_number, pagenum), query={ + 'page': pagenum, + 'per': 40, + }) for episode in season_info.get('episodes'): video_id = episode['slugPath'].replace('-izle', '') yield self.url_result( @@ -216,8 +209,8 @@ class PuhuTVSerieIE(InfoExtractor): 'https://puhutv.com/api/slug/%s-detay' % playlist_id, playlist_id)['data'] title = info.get('name') - uploader = info.get('producer', {}).get('name') - uploader_id = info.get('producer', {}).get('id') + uploader = try_get(info, lambda x: x['producer']['name'], compat_str) + uploader_id = try_get(info, lambda x: x['producer']['id']) seasons = info.get('seasons') if seasons: entries = self._extract_entries(playlist_id, seasons) From 681ab939b269d06dc32554aced509ccdf43c1600 Mon Sep 17 00:00:00 2001 From: Enes Date: Tue, 12 Jun 2018 00:54:57 +0300 Subject: [PATCH 17/18] [puhutv] fix title extraction --- youtube_dl/extractor/puhutv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py index 86ed4d3ef..df602d290 100644 --- a/youtube_dl/extractor/puhutv.py +++ b/youtube_dl/extractor/puhutv.py @@ -61,7 +61,7 @@ class PuhuTVIE(InfoExtractor): display_id = compat_str(info['id']) title = info['title']['name'] - if isinstance(info.get('display_name'), compat_str): + if info.get('display_name'): title = '%s %s' % (title, info.get('display_name')) description = try_get(info, lambda x: x['title']['description'], compat_str) or info.get('description') From 330751897f32554f7ead19e736a5580aee3bddcb Mon Sep 17 00:00:00 2001 From: Enes Date: Sun, 22 Jul 2018 00:29:17 +0300 Subject: [PATCH 18/18] [puhutv] update for url_or_none --- youtube_dl/extractor/puhutv.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py index df602d290..8abdab52a 100644 --- a/youtube_dl/extractor/puhutv.py +++ b/youtube_dl/extractor/puhutv.py @@ -8,6 +8,7 @@ from ..utils import ( float_or_none, determine_ext, str_or_none, + url_or_none, unified_strdate, unified_timestamp, try_get, @@ -87,7 +88,7 @@ class PuhuTVIE(InfoExtractor): thumbnails = [] thumbs_dict = try_get(info, lambda x: x['content']['images']['wide'], dict) or {} for id, url in thumbs_dict.items(): - if not url or not isinstance(url, compat_str): + if not url_or_none(url): continue thumbnails.append({ 'url': 'https://%s' % url, @@ -99,8 +100,8 @@ class PuhuTVIE(InfoExtractor): if not isinstance(subtitle, dict): continue lang = subtitle.get('language') - sub_url = subtitle.get('url') - if not lang or not isinstance(lang, compat_str) or not sub_url or not isinstance(sub_url, compat_str): + sub_url = url_or_none(subtitle.get('url')) + if not lang or not isinstance(lang, compat_str) or not sub_url: continue subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{ 'url': sub_url @@ -113,8 +114,8 @@ class PuhuTVIE(InfoExtractor): formats = [] for format in req_formats['data']['videos']: - media_url = format.get('url') - if not media_url or not isinstance(media_url, compat_str): + media_url = url_or_none(format.get('url')) + if not media_url: continue ext = format.get('video_format') or determine_ext(media_url) quality = format.get('quality')