From 8cd809fb3dc446ec06afa2c1d398c9fe31a435cc Mon Sep 17 00:00:00 2001 From: Jan 'Yenda' Trmal Date: Fri, 10 Jan 2020 10:14:18 +0100 Subject: [PATCH 1/4] [nova:embed] Fix extraction (closes #23672) --- youtube_dl/extractor/nova.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 901f44b54..aefd8eab1 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -43,6 +43,13 @@ class NovaEmbedIE(InfoExtractor): formats = [] for format_id, format_list in bitrates.items(): + if format_id == 'hls': + m3u8_url = url_or_none(format_list) + if not m3u8_url: + continue + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', m3u8_id='hls', fatal=False)) + if not isinstance(format_list, list): continue for format_url in format_list: From b68a6e32fb01717b24fd2201c3e1a5611fd1c963 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 15 Feb 2020 02:00:26 +0700 Subject: [PATCH 2/4] [nova:embed] Improve (closes #23690) --- youtube_dl/extractor/nova.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index aefd8eab1..62d0552e9 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -18,7 +18,7 @@ class NovaEmbedIE(InfoExtractor): _VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P[^/?#&]+)' _TEST = { 'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1', - 'md5': 'b3834f6de5401baabf31ed57456463f7', + 'md5': 'ee009bafcc794541570edd44b71cbea3', 'info_dict': { 'id': '8o0n0r', 'ext': 'mp4', @@ -43,19 +43,18 @@ class NovaEmbedIE(InfoExtractor): formats = [] for format_id, format_list in bitrates.items(): - if format_id == 'hls': - m3u8_url = url_or_none(format_list) - if not m3u8_url: - continue - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, ext='mp4', m3u8_id='hls', fatal=False)) - if not isinstance(format_list, list): - continue + format_list = [format_list] for format_url in format_list: format_url = url_or_none(format_url) if not format_url: continue + if format_id == 'hls': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, ext='mp4', + entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False)) + continue f = { 'url': format_url, } From 06f1de2daff8351f572974dafccebefd378b9f99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 15 Feb 2020 02:16:26 +0700 Subject: [PATCH 3/4] [nova] Improve extraction (refs #23690) --- youtube_dl/extractor/nova.py | 37 +++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 62d0552e9..2850af5db 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -97,7 +97,7 @@ class NovaIE(InfoExtractor): _VALID_URL = r'https?://(?:[^.]+\.)?(?Ptv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P[^/]+?)(?:\.html|/|$)' _TESTS = [{ 'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260', - 'md5': '1dd7b9d5ea27bc361f110cd855a19bd3', + 'md5': '249baab7d0104e186e78b0899c7d5f28', 'info_dict': { 'id': '1757139', 'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci', @@ -119,7 +119,8 @@ class NovaIE(InfoExtractor): 'params': { # rtmp download 'skip_download': True, - } + }, + 'skip': 'gone', }, { # media.cms.nova.cz embed 'url': 'https://novaplus.nova.cz/porad/ulice/epizoda/18760-2180-dil', @@ -134,6 +135,7 @@ class NovaIE(InfoExtractor): 'skip_download': True, }, 'add_ie': [NovaEmbedIE.ie_key()], + 'skip': 'CHYBA 404: STRÁNKA NENALEZENA', }, { 'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html', 'only_matching': True, @@ -158,14 +160,29 @@ class NovaIE(InfoExtractor): webpage = self._download_webpage(url, display_id) + description = clean_html(self._og_search_description(webpage, default=None)) + if site == 'novaplus': + upload_date = unified_strdate(self._search_regex( + r'(\d{1,2}-\d{1,2}-\d{4})$', display_id, 'upload date', default=None)) + elif site == 'fanda': + upload_date = unified_strdate(self._search_regex( + r'(\d{1,2}\.\d{1,2}\.\d{4})', webpage, 'upload date', default=None)) + else: + upload_date = None + # novaplus embed_id = self._search_regex( r']+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)', webpage, 'embed url', default=None) if embed_id: - return self.url_result( - 'https://media.cms.nova.cz/embed/%s' % embed_id, - ie=NovaEmbedIE.ie_key(), video_id=embed_id) + return { + '_type': 'url_transparent', + 'url': 'https://media.cms.nova.cz/embed/%s' % embed_id, + 'ie_key': NovaEmbedIE.ie_key(), + 'id': embed_id, + 'description': description, + 'upload_date': upload_date + } video_id = self._search_regex( [r"(?:media|video_id)\s*:\s*'(\d+)'", @@ -239,18 +256,8 @@ class NovaIE(InfoExtractor): self._sort_formats(formats) title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage) - description = clean_html(self._og_search_description(webpage, default=None)) thumbnail = config.get('poster') - if site == 'novaplus': - upload_date = unified_strdate(self._search_regex( - r'(\d{1,2}-\d{1,2}-\d{4})$', display_id, 'upload date', default=None)) - elif site == 'fanda': - upload_date = unified_strdate(self._search_regex( - r'(\d{1,2}\.\d{1,2}\.\d{4})', webpage, 'upload date', default=None)) - else: - upload_date = None - return { 'id': video_id, 'display_id': display_id, From 293c9f0186334fe8475f67508a8e156fb8b64e1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 15 Feb 2020 02:23:29 +0700 Subject: [PATCH 4/4] [jpopsuki] Remove extractor (closes #23858) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/jpopsukitv.py | 68 ------------------------------ 2 files changed, 69 deletions(-) delete mode 100644 youtube_dl/extractor/jpopsukitv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a202a0449..64d1fa251 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -497,7 +497,6 @@ from .jeuxvideo import JeuxVideoIE from .jove import JoveIE from .joj import JojIE from .jwplatform import JWPlatformIE -from .jpopsukitv import JpopsukiIE from .kakao import KakaoIE from .kaltura import KalturaIE from .kanalplay import KanalPlayIE diff --git a/youtube_dl/extractor/jpopsukitv.py b/youtube_dl/extractor/jpopsukitv.py deleted file mode 100644 index 4b5f346d1..000000000 --- a/youtube_dl/extractor/jpopsukitv.py +++ /dev/null @@ -1,68 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - unified_strdate, -) - - -class JpopsukiIE(InfoExtractor): - IE_NAME = 'jpopsuki.tv' - _VALID_URL = r'https?://(?:www\.)?jpopsuki\.tv/(?:category/)?video/[^/]+/(?P\S+)' - - _TEST = { - 'url': 'http://www.jpopsuki.tv/video/ayumi-hamasaki---evolution/00be659d23b0b40508169cdee4545771', - 'md5': '88018c0c1a9b1387940e90ec9e7e198e', - 'info_dict': { - 'id': '00be659d23b0b40508169cdee4545771', - 'ext': 'mp4', - 'title': 'ayumi hamasaki - evolution', - 'description': 'Release date: 2001.01.31\r\n浜崎あゆみ - evolution', - 'thumbnail': 'http://www.jpopsuki.tv/cache/89722c74d2a2ebe58bcac65321c115b2.jpg', - 'uploader': 'plama_chan', - 'uploader_id': '404', - 'upload_date': '20121101' - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - video_url = 'http://www.jpopsuki.tv' + self._html_search_regex( - r'from: uploaded: (.*?)', webpage, 'video upload_date', - fatal=False)) - view_count_str = self._html_search_regex( - r'
  • Hits: ([0-9]+?)
  • ', webpage, 'video view_count', - fatal=False) - comment_count_str = self._html_search_regex( - r'

    ([0-9]+?) comments

    ', webpage, 'video comment_count', - fatal=False) - - return { - 'id': video_id, - 'url': video_url, - 'title': video_title, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'upload_date': upload_date, - 'view_count': int_or_none(view_count_str), - 'comment_count': int_or_none(comment_count_str), - }