diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index b54605ba0..d6ea8bf79 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -565,7 +565,7 @@ class YahooGyaOIE(InfoExtractor): class YahooJapanNewsIE(InfoExtractor): IE_NAME = 'yahoo:japannews' IE_DESC = 'Yahoo! Japan News' - _VALID_URL = r'https?://(?P(?:news|headlines)\.yahoo\.co\.jp)(/[^\d]*(?P\d[\d-]*\d))?' + _VALID_URL = r'https?://(?P(?:news|headlines)\.yahoo\.co\.jp)[^\d]*(?P\d[\d-]*\d)?' _TESTS = [{ 'url': 'https://headlines.yahoo.co.jp/videonews/ann?a=20190716-00000071-ann-int', 'info_dict': { @@ -610,10 +610,8 @@ class YahooJapanNewsIE(InfoExtractor): formats.append({ 'url': url, 'format_id': 'http-%s' % compat_str(vid.get('bitrate', '')), - 'ext': determine_ext(url), 'height': int_or_none(vid.get('height')), - 'width': int_or_none(vid.get('width')), - 'btr': int_or_none(vid.get('bitrate')) + 'width': int_or_none(vid.get('width')) }) self._remove_duplicate_formats(formats) self._sort_formats(formats) @@ -630,18 +628,6 @@ class YahooJapanNewsIE(InfoExtractor): title = self._html_search_meta( ['og:title', 'twitter:title'], webpage, 'title', default=None ) or self._html_search_regex('([^<]+)', webpage, 'title') - description = self._html_search_meta([ - 'og:description', 'description', 'twitter:description' - ], webpage, 'description', default=None) - thumbnail = self._og_search_thumbnail( - webpage, default=None - ) or self._html_search_meta('twitter:image', webpage, 'thumbnail', default=None) - space_id = self._search_regex([ - r']+class=["\']yvpub-player["\'][^>]+spaceid=([^&"\']+)', - r'YAHOO\.JP\.srch\.\w+link\.onLoad[^;]+spaceID["\' ]*:["\' ]+([^"\']+)', - r'