[sportbox] Improve extraction, add support for matchtv.ru and fix video id (closes #17978)
This commit is contained in:
		
							parent
							
								
									bebef10909
								
							
						
					
					
						commit
						476cf548e1
					
				| @ -1043,7 +1043,7 @@ from .spike import ( | |||||||
| ) | ) | ||||||
| from .stitcher import StitcherIE | from .stitcher import StitcherIE | ||||||
| from .sport5 import Sport5IE | from .sport5 import Sport5IE | ||||||
| from .sportbox import SportBoxEmbedIE | from .sportbox import SportBoxIE | ||||||
| from .sportdeutschland import SportDeutschlandIE | from .sportdeutschland import SportDeutschlandIE | ||||||
| from .springboardplatform import SpringboardPlatformIE | from .springboardplatform import SpringboardPlatformIE | ||||||
| from .sprout import SproutIE | from .sprout import SproutIE | ||||||
|  | |||||||
| @ -47,7 +47,7 @@ from .nbc import NBCSportsVPlayerIE | |||||||
| from .ooyala import OoyalaIE | from .ooyala import OoyalaIE | ||||||
| from .rutv import RUTVIE | from .rutv import RUTVIE | ||||||
| from .tvc import TVCIE | from .tvc import TVCIE | ||||||
| from .sportbox import SportBoxEmbedIE | from .sportbox import SportBoxIE | ||||||
| from .smotri import SmotriIE | from .smotri import SmotriIE | ||||||
| from .myvi import MyviIE | from .myvi import MyviIE | ||||||
| from .condenast import CondeNastIE | from .condenast import CondeNastIE | ||||||
| @ -2636,9 +2636,9 @@ class GenericIE(InfoExtractor): | |||||||
|             return self.url_result(tvc_url, 'TVC') |             return self.url_result(tvc_url, 'TVC') | ||||||
| 
 | 
 | ||||||
|         # Look for embedded SportBox player |         # Look for embedded SportBox player | ||||||
|         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage) |         sportbox_urls = SportBoxIE._extract_urls(webpage) | ||||||
|         if sportbox_urls: |         if sportbox_urls: | ||||||
|             return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed') |             return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key()) | ||||||
| 
 | 
 | ||||||
|         # Look for embedded XHamster player |         # Look for embedded XHamster player | ||||||
|         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage) |         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage) | ||||||
|  | |||||||
| @ -8,20 +8,24 @@ from ..utils import ( | |||||||
|     determine_ext, |     determine_ext, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     js_to_json, |     js_to_json, | ||||||
|  |     merge_dicts, | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class SportBoxEmbedIE(InfoExtractor): | class SportBoxIE(InfoExtractor): | ||||||
|     _VALID_URL = r'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)' |     _VALID_URL = r'https?://(?:news\.sportbox|matchtv)\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'http://news.sportbox.ru/vdl/player/ci/211355', |         'url': 'http://news.sportbox.ru/vdl/player/ci/211355', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '211355', |             'id': '109158', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»', |             'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»', | ||||||
|  |             'description': 'В Новороссийске прошел детский турнир «Поле славы боевой»', | ||||||
|             'thumbnail': r're:^https?://.*\.jpg$', |             'thumbnail': r're:^https?://.*\.jpg$', | ||||||
|             'duration': 292, |             'duration': 292, | ||||||
|             'view_count': int, |             'view_count': int, | ||||||
|  |             'timestamp': 1426237001, | ||||||
|  |             'upload_date': '20150313', | ||||||
|         }, |         }, | ||||||
|         'params': { |         'params': { | ||||||
|             # m3u8 download |             # m3u8 download | ||||||
| @ -33,12 +37,18 @@ class SportBoxEmbedIE(InfoExtractor): | |||||||
|     }, { |     }, { | ||||||
|         'url': 'https://news.sportbox.ru/vdl/player/media/193095', |         'url': 'https://news.sportbox.ru/vdl/player/media/193095', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://news.sportbox.ru/vdl/player/media/109158', | ||||||
|  |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         'url': 'https://matchtv.ru/vdl/player/media/109158', | ||||||
|  |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
| 
 | 
 | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     def _extract_urls(webpage): |     def _extract_urls(webpage): | ||||||
|         return re.findall( |         return re.findall( | ||||||
|             r'<iframe[^>]+src="(https?://news\.sportbox\.ru/vdl/player[^"]+)"', |             r'<iframe[^>]+src="(https?://(?:news\.sportbox|matchtv)\.ru/vdl/player[^"]+)"', | ||||||
|             webpage) |             webpage) | ||||||
| 
 | 
 | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
| @ -46,22 +56,14 @@ class SportBoxEmbedIE(InfoExtractor): | |||||||
| 
 | 
 | ||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, video_id) | ||||||
| 
 | 
 | ||||||
|         wjplayer_data = self._parse_json( |         sources = self._parse_json( | ||||||
|             self._search_regex( |             self._search_regex( | ||||||
|                 r'(?s)var\s+playerOptions\s*=\s*({.+?});', webpage, 'wjplayer settings'), |                 r'(?s)playerOptions\.sources(?:WithRes)?\s*=\s*(\[.+?\])\s*;\s*\n', | ||||||
|  |                 webpage, 'sources'), | ||||||
|             video_id, transform_source=js_to_json) |             video_id, transform_source=js_to_json) | ||||||
| 
 | 
 | ||||||
|         wjplayer_data['sources'] = self._parse_json( |  | ||||||
|             self._search_regex( |  | ||||||
|                 r'(?s)playerOptions\.sources\s*=\s*(\[.+?\]);', webpage, 'wjplayer sources'), |  | ||||||
|             video_id, transform_source=js_to_json) |  | ||||||
| 
 |  | ||||||
|         title = self._html_search_meta( |  | ||||||
|             ['og:title', 'twitter:title'], webpage) or self._html_search_regex( |  | ||||||
|             r'<title>(.+?)</title>', webpage, 'title', fatal=False) or video_id |  | ||||||
| 
 |  | ||||||
|         formats = [] |         formats = [] | ||||||
|         for source in wjplayer_data['sources']: |         for source in sources: | ||||||
|             src = source.get('src') |             src = source.get('src') | ||||||
|             if not src: |             if not src: | ||||||
|                 continue |                 continue | ||||||
| @ -75,14 +77,23 @@ class SportBoxEmbedIE(InfoExtractor): | |||||||
|                 }) |                 }) | ||||||
|         self._sort_formats(formats) |         self._sort_formats(formats) | ||||||
| 
 | 
 | ||||||
|  |         player = self._parse_json( | ||||||
|  |             self._search_regex( | ||||||
|  |                 r'(?s)playerOptions\s*=\s*({.+?})\s*;\s*\n', webpage, | ||||||
|  |                 'player options', default='{}'), | ||||||
|  |             video_id, transform_source=js_to_json) | ||||||
|  |         media_id = player['mediaId'] | ||||||
|  | 
 | ||||||
|  |         info = self._search_json_ld(webpage, media_id, default={}) | ||||||
|  | 
 | ||||||
|         view_count = int_or_none(self._search_regex( |         view_count = int_or_none(self._search_regex( | ||||||
|             r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None)) |             r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None)) | ||||||
| 
 | 
 | ||||||
|         return { |         return merge_dicts(info, { | ||||||
|             'id': video_id, |             'id': media_id, | ||||||
|             'title': title, |             'title': self._og_search_title(webpage, default=None) or media_id, | ||||||
|             'thumbnail': wjplayer_data.get('poster'), |             'thumbnail': player.get('poster'), | ||||||
|             'duration': int_or_none(wjplayer_data.get('duration')), |             'duration': int_or_none(player.get('duration')), | ||||||
|             'view_count': view_count, |             'view_count': view_count, | ||||||
|             'formats': formats, |             'formats': formats, | ||||||
|         } |         }) | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user