[r7] Fix extraction and add support for articles (Closes #9826)
This commit is contained in:
		
							parent
							
								
									cb23192bc4
								
							
						
					
					
						commit
						7577d849a6
					
				| @ -631,7 +631,10 @@ from .qqmusic import ( | ||||
|     QQMusicToplistIE, | ||||
|     QQMusicPlaylistIE, | ||||
| ) | ||||
| from .r7 import R7IE | ||||
| from .r7 import ( | ||||
|     R7IE, | ||||
|     R7ArticleIE, | ||||
| ) | ||||
| from .radiocanada import ( | ||||
|     RadioCanadaIE, | ||||
|     RadioCanadaAudioVideoIE, | ||||
|  | ||||
| @ -2,15 +2,12 @@ | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     js_to_json, | ||||
|     unescapeHTML, | ||||
|     int_or_none, | ||||
| ) | ||||
| from ..utils import int_or_none | ||||
| 
 | ||||
| 
 | ||||
| class R7IE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?:// | ||||
|     _VALID_URL = r'''(?x) | ||||
|                         https?:// | ||||
|                         (?: | ||||
|                             (?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/| | ||||
|                             noticias\.r7\.com(?:/[^/]+)+/[^/]+-| | ||||
| @ -25,6 +22,7 @@ class R7IE(InfoExtractor): | ||||
|             'id': '54e7050b0cf2ff57e0279389', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"', | ||||
|             'description': 'md5:01812008664be76a6479aa58ec865b72', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'duration': 98, | ||||
|             'like_count': int, | ||||
| @ -44,45 +42,72 @@ class R7IE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
| 
 | ||||
|         webpage = self._download_webpage( | ||||
|             'http://player.r7.com/video/i/%s' % video_id, video_id) | ||||
|         video = self._download_json( | ||||
|             'http://player-api.r7.com/video/i/%s' % video_id, video_id) | ||||
| 
 | ||||
|         item = self._parse_json(js_to_json(self._search_regex( | ||||
|             r'(?s)var\s+item\s*=\s*({.+?});', webpage, 'player')), video_id) | ||||
| 
 | ||||
|         title = unescapeHTML(item['title']) | ||||
|         thumbnail = item.get('init', {}).get('thumbUri') | ||||
|         duration = None | ||||
| 
 | ||||
|         statistics = item.get('statistics', {}) | ||||
|         like_count = int_or_none(statistics.get('likes')) | ||||
|         view_count = int_or_none(statistics.get('views')) | ||||
|         title = video['title'] | ||||
| 
 | ||||
|         formats = [] | ||||
|         for format_key, format_dict in item['playlist'][0].items(): | ||||
|             src = format_dict.get('src') | ||||
|             if not src: | ||||
|                 continue | ||||
|             format_id = format_dict.get('format') or format_key | ||||
|             if duration is None: | ||||
|                 duration = format_dict.get('duration') | ||||
|             if '.f4m' in src: | ||||
|                 formats.extend(self._extract_f4m_formats(src, video_id, preference=-1)) | ||||
|             elif src.endswith('.m3u8'): | ||||
|                 formats.extend(self._extract_m3u8_formats(src, video_id, 'mp4', preference=-2)) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': src, | ||||
|                     'format_id': format_id, | ||||
|                 }) | ||||
|         media_url_hls = video.get('media_url_hls') | ||||
|         if media_url_hls: | ||||
|             formats.extend(self._extract_m3u8_formats( | ||||
|                 media_url_hls, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                 m3u8_id='hls', fatal=False)) | ||||
|         media_url = video.get('media_url') | ||||
|         if media_url: | ||||
|             f = { | ||||
|                 'url': media_url, | ||||
|                 'format_id': 'http', | ||||
|             } | ||||
|             # m3u8 format always matches the http format, let's copy metadata from | ||||
|             # one to another | ||||
|             m3u8_formats = list(filter( | ||||
|                 lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', | ||||
|                 formats)) | ||||
|             if len(m3u8_formats) == 1: | ||||
|                 f_copy = m3u8_formats[0].copy() | ||||
|                 f_copy.update(f) | ||||
|                 f_copy['protocol'] = 'http' | ||||
|                 f = f_copy | ||||
|             formats.append(f) | ||||
|         self._sort_formats(formats) | ||||
| 
 | ||||
|         description = video.get('description') | ||||
|         thumbnail = video.get('thumb') | ||||
|         duration = int_or_none(video.get('media_duration')) | ||||
|         like_count = int_or_none(video.get('likes')) | ||||
|         view_count = int_or_none(video.get('views')) | ||||
| 
 | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'like_count': like_count, | ||||
|             'view_count': view_count, | ||||
|             'formats': formats, | ||||
|         } | ||||
| 
 | ||||
| 
 | ||||
| class R7ArticleIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:[a-zA-Z]+)\.r7\.com/(?:[^/]+/)+[^/?#&]+-(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://tv.r7.com/record-play/balanco-geral/videos/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-16102015', | ||||
|         'only_matching': True, | ||||
|     } | ||||
| 
 | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return False if R7IE.suitable(url) else super(R7ArticleIE, cls).suitable(url) | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
| 
 | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
| 
 | ||||
|         video_id = self._search_regex( | ||||
|             r'<div[^>]+(?:id=["\']player-|class=["\']embed["\'][^>]+id=["\'])([\da-f]{24})', | ||||
|             webpage, 'video id') | ||||
| 
 | ||||
|         return self.url_result('http://player.r7.com/video/i/%s' % video_id, R7IE.ie_key()) | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user