[go90] Improve extraction
- add metadata for 'series', 'episode', 'season', 'season_id', 'season_number', and 'episode_number' - integrate series title into the title - extract subtitles (fallback to `vtt` if the subtitle file type detection fails as that is the most likely extension)
This commit is contained in:
		
							parent
							
								
									3dfceb286c
								
							
						
					
					
						commit
						334f41e0d8
					
				| @ -5,6 +5,7 @@ import re | |||||||
| 
 | 
 | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|  |     determine_ext, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     parse_iso8601, |     parse_iso8601, | ||||||
| ) | ) | ||||||
| @ -18,7 +19,7 @@ class Go90IE(InfoExtractor): | |||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '84BUqjLpf9D', |             'id': '84BUqjLpf9D', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'Inside The Utah Coalition Against Pornography Convention', |             'title': 'Daily VICE - Inside The Utah Coalition Against Pornography Convention', | ||||||
|             'description': 'VICE\'s Karley Sciortino meets with activists who discuss the state\'s strong anti-porn stance. Then, VICE Sports explains NFL contracts.', |             'description': 'VICE\'s Karley Sciortino meets with activists who discuss the state\'s strong anti-porn stance. Then, VICE Sports explains NFL contracts.', | ||||||
|             'timestamp': 1491868800, |             'timestamp': 1491868800, | ||||||
|             'upload_date': '20170411', |             'upload_date': '20170411', | ||||||
| @ -32,11 +33,28 @@ class Go90IE(InfoExtractor): | |||||||
|             video_id, headers={ |             video_id, headers={ | ||||||
|                 'Content-Type': 'application/json; charset=utf-8', |                 'Content-Type': 'application/json; charset=utf-8', | ||||||
|             }, data=b'{"client":"web","device_type":"pc"}') |             }, data=b'{"client":"web","device_type":"pc"}') | ||||||
|         title = video_data['title'] |  | ||||||
|         main_video_asset = video_data['main_video_asset'] |         main_video_asset = video_data['main_video_asset'] | ||||||
| 
 | 
 | ||||||
|  |         episode_number = int_or_none(video_data.get('episode_number')) | ||||||
|  |         series = None | ||||||
|  |         season = None | ||||||
|  |         season_id = None | ||||||
|  |         season_number = None | ||||||
|  |         for metadata in video_data.get('__children', {}).get('Item', {}).values(): | ||||||
|  |             if metadata.get('type') == 'show': | ||||||
|  |                 series = metadata.get('title') | ||||||
|  |             elif metadata.get('type') == 'season': | ||||||
|  |                 season = metadata.get('title') | ||||||
|  |                 season_id = metadata.get('id') | ||||||
|  |                 season_number = int_or_none(metadata.get('season_number')) | ||||||
|  | 
 | ||||||
|  |         title = episode = video_data.get('title') or series | ||||||
|  |         if series and series != title: | ||||||
|  |             title = '%s - %s' % (series, title) | ||||||
|  | 
 | ||||||
|         thumbnails = [] |         thumbnails = [] | ||||||
|         formats = [] |         formats = [] | ||||||
|  |         subtitles = {} | ||||||
|         for asset in video_data.get('assets'): |         for asset in video_data.get('assets'): | ||||||
|             if asset.get('id') == main_video_asset: |             if asset.get('id') == main_video_asset: | ||||||
|                 for source in asset.get('sources', []): |                 for source in asset.get('sources', []): | ||||||
| @ -70,6 +88,15 @@ class Go90IE(InfoExtractor): | |||||||
|                             'height': int_or_none(source.get('height')), |                             'height': int_or_none(source.get('height')), | ||||||
|                             'tbr': int_or_none(source.get('bitrate')), |                             'tbr': int_or_none(source.get('bitrate')), | ||||||
|                         }) |                         }) | ||||||
|  | 
 | ||||||
|  |                 for caption in asset.get('caption_metadata', []): | ||||||
|  |                     caption_url = caption.get('source_url') | ||||||
|  |                     if not caption_url: | ||||||
|  |                         continue | ||||||
|  |                     subtitles.setdefault(caption.get('language', 'en'), []).append({ | ||||||
|  |                         'url': caption_url, | ||||||
|  |                         'ext': determine_ext(caption_url, 'vtt'), | ||||||
|  |                     }) | ||||||
|             elif asset.get('type') == 'image': |             elif asset.get('type') == 'image': | ||||||
|                 asset_location = asset.get('location') |                 asset_location = asset.get('location') | ||||||
|                 if not asset_location: |                 if not asset_location: | ||||||
| @ -89,4 +116,11 @@ class Go90IE(InfoExtractor): | |||||||
|             'description': video_data.get('short_description'), |             'description': video_data.get('short_description'), | ||||||
|             'like_count': int_or_none(video_data.get('like_count')), |             'like_count': int_or_none(video_data.get('like_count')), | ||||||
|             'timestamp': parse_iso8601(video_data.get('released_at')), |             'timestamp': parse_iso8601(video_data.get('released_at')), | ||||||
|  |             'series': series, | ||||||
|  |             'episode': episode, | ||||||
|  |             'season': season, | ||||||
|  |             'season_id': season_id, | ||||||
|  |             'season_number': season_number, | ||||||
|  |             'episode_number': episode_number, | ||||||
|  |             'subtitles': subtitles, | ||||||
|         } |         } | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user