From 0f726bc002025422fea8c0c55f5ecca02be83894 Mon Sep 17 00:00:00 2001 From: kosantosbik Date: Fri, 3 May 2019 18:19:30 +0300 Subject: [PATCH] [Kanal D]Added support for new site --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/kanald.py | 122 +++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+) create mode 100644 youtube_dl/extractor/kanald.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 0e3ccb82d..8eedfb7a2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1524,3 +1524,4 @@ from .zattoo import ( from .zdf import ZDFIE, ZDFChannelIE from .zingmp3 import ZingMp3IE from .zype import ZypeIE +from .kanald import KanaldIE, KanaldSerieIE diff --git a/youtube_dl/extractor/kanald.py b/youtube_dl/extractor/kanald.py new file mode 100644 index 000000000..4bbe4a98e --- /dev/null +++ b/youtube_dl/extractor/kanald.py @@ -0,0 +1,122 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from re import findall + +from .common import InfoExtractor +from ..compat import compat_str + + +class KanaldIE(InfoExtractor): + "Kanal D TV Website extractor" + IE_NAME = 'Kanal D' + _VALID_URL = r'https?://(?:www\.)?kanald\.com\.tr/(?:.*)/(?P.*\d+.*bolum(?!ler).*)/?' + _TESTS = [{ + 'url': 'https://www.kanald.com.tr/kuzeyguney/1-bolum/10115', + 'md5': '88d518f7803b53e9e6187b05fe0f1a63', + 'info_dict': { + 'id': '1-bolum/10115', + 'ext': 'm3u8', + 'title': '1.Bölüm', + 'release_date': '20110907', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Kanal D', + 'description': '1.Bölüm' + } + }, { + 'url': + 'https://www.kanald.com.tr/sevdanin-bahcesi/bolumler/sevdanin-bahcesi-2-bolum', + 'only_matching': True + }, { + 'url': + 'https://www.kanald.com.tr/yarim-elma/bolum/yarim-elma-36-bolum', + 'only_matching': True + }, { + 'url': + 'https://www.kanald.com.tr/ask-ve-gunah/bolumler/ask-ve-gunah-120-bolum-final', + 'only_matching': True + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex(r'

(.+?)

', webpage, 'title') + video_url = "https://soledge13.dogannet.tv/" + self._search_regex( + r'["\']contentUrl["\']:["\'](?P.*)["\']', webpage, + 'video_url') + formats = self._extract_m3u8_formats(video_url, video_id) + thumbnail = self._search_regex( + r'.*)["\'].*', + webpage, 'thumbnail') + description = self._og_search_description(webpage) + year = self._search_regex( + r'["\']uploadDate["\']:["\'](?P\d{4}).*["\']', webpage, + 'year') + month = self._search_regex( + r'["\']uploadDate["\']:["\']\d{4}-(?P\d\d).*["\']', webpage, + 'month') + day = self._search_regex( + r'["\']uploadDate["\']:["\']\d{4}-\d\d-(?P\d\d).*["\']', + webpage, 'day') + release_date = year + month + day + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'description': description, + 'url': video_url, + 'uploader': compat_str('Kanal D'), + 'release_date': release_date + } + + +class KanaldSerieIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?kanald\.com\.tr/(?P.*)/(?:bolum|bolumler)' + IE_NAME = 'Kanal D:serie' + _TESTS = [{ + 'url': 'https://www.kanald.com.tr/kuzeyguney/bolum', + 'info_dict': { + 'id': 'kuzeyguney' + }, + 'playlist_mincount': 80 + }, { + 'url': 'https://www.kanald.com.tr/iki-yalanci/bolumler', + 'only_matching': True + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + next_page = url + webpage = None + page = 1 + has_more = True + entries = [] + + while has_more: + webpage = self._download_webpage(next_page, + playlist_id, + note='Downloading page %s' % page) + + try: + next_page = 'https://www.kanald.com.tr' + self._search_regex( + r'class=["\']next["\']>.*)["\']>.*', + webpage, + 'hasmore', + default=None, + fatal=False) + page += 1 + except TypeError: + has_more = False + + page_entries = findall( + r'.*)["\'].*', + webpage) + + for entry in page_entries: + entries.append( + self.url_result('https://www.kanald.com.tr%s' % entry, + ie=KanaldIE.ie_key())) + + return self.playlist_result(entries, playlist_id)