From fcaf4d7a067e9ac6b36a5f7b1100be391f492a79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Mar 2020 04:39:21 +0700 Subject: [PATCH 1/4] [nhk] Relax _VALID_URL (#24329) --- youtube_dl/extractor/nhk.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index d2cbc9f54..45bc4d85e 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class NhkVodIE(InfoExtractor): - _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P[a-z]{2})/ondemand/(?Pvideo|audio)/(?P\d{7}|[a-z]+-\d{8}-\d+)' + _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P[a-z]{2})/ondemand/(?Pvideo|audio)/(?P\d{7}|[^/]+?-\d{8}-\d+)' # Content available only for a limited period of time. Visit # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples. _TESTS = [{ @@ -30,6 +30,9 @@ class NhkVodIE(InfoExtractor): }, { 'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/', 'only_matching': True, + }, { + 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/', + 'only_matching': True, }] _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/episode/%s/%s/all%s.json' From 9bfe08859491882e40603ed6c4eb59760ed35ca7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Mar 2020 04:40:11 +0700 Subject: [PATCH 2/4] [nhk] Remove obsolete rtmp formats (closes #24329) --- youtube_dl/extractor/nhk.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index 45bc4d85e..ce13f6bb9 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -87,13 +87,6 @@ class NhkVodIE(InfoExtractor): info['formats'] = self._extract_m3u8_formats( 'https://nhks-vh.akamaihd.net/i%s/master.m3u8' % audio_path, episode_id, 'm4a', m3u8_id='hls', fatal=False) - for proto in ('rtmpt', 'rtmp'): - info['formats'].append({ - 'ext': 'flv', - 'format_id': proto, - 'url': '%s://flv.nhk.or.jp/ondemand/mp4:flv%s' % (proto, audio_path), - 'vcodec': 'none', - }) for f in info['formats']: f['language'] = lang return info From 541fe3eaff579f72ccc14f97009a8904f739368f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Mar 2020 04:42:40 +0700 Subject: [PATCH 3/4] [nhk] Update m3u8 URL and use native hls (#24329) --- youtube_dl/extractor/nhk.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index ce13f6bb9..de6a707c4 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -85,8 +85,9 @@ class NhkVodIE(InfoExtractor): audio = episode['audio'] audio_path = audio['audio'] info['formats'] = self._extract_m3u8_formats( - 'https://nhks-vh.akamaihd.net/i%s/master.m3u8' % audio_path, - episode_id, 'm4a', m3u8_id='hls', fatal=False) + 'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path, + episode_id, 'm4a', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False) for f in info['formats']: f['language'] = lang return info From 4cbce88f8b44ab17d55fe1b7615e37a2c1f142d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Mar 2020 04:58:24 +0700 Subject: [PATCH 4/4] [ndr] Fix extraction (closes #24326) --- youtube_dl/extractor/ndr.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index 9c8bf05af..2447c812e 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..utils import ( determine_ext, int_or_none, + merge_dicts, parse_iso8601, qualities, try_get, @@ -87,21 +88,25 @@ class NDRIE(NDRBaseIE): def _extract_embed(self, webpage, display_id): embed_url = self._html_search_meta( - 'embedURL', webpage, 'embed URL', fatal=True) + 'embedURL', webpage, 'embed URL', + default=None) or self._search_regex( + r'\bembedUrl["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, + 'embed URL', group='url') description = self._search_regex( r']+itemprop="description">([^<]+)

', webpage, 'description', default=None) or self._og_search_description(webpage) timestamp = parse_iso8601( self._search_regex( r']+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"', - webpage, 'upload date', fatal=False)) - return { + webpage, 'upload date', default=None)) + info = self._search_json_ld(webpage, display_id, default={}) + return merge_dicts({ '_type': 'url_transparent', 'url': embed_url, 'display_id': display_id, 'description': description, 'timestamp': timestamp, - } + }, info) class NJoyIE(NDRBaseIE):