| 
									
										
										
										
											2015-01-11 23:23:10 +01:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import json | 
					
						
							|  |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-11 23:40:27 +01:00
										 |  |  | from .common import InfoExtractor | 
					
						
							|  |  |  | from ..utils import ( | 
					
						
							|  |  |  |     int_or_none, | 
					
						
							|  |  |  |     parse_iso8601, | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-11 23:23:10 +01:00
										 |  |  | class DRBonanzaIE(InfoExtractor): | 
					
						
							|  |  |  |     _VALID_URL = r'https?://(?:www\.)?dr\.dk/bonanza/(?:[^/]+/)+(?:[^/])+?(?:assetId=(?P<id>\d+))?(?:[#&]|$)' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _TESTS = [{ | 
					
						
							|  |  |  |         'url': 'http://www.dr.dk/bonanza/serie/portraetter/Talkshowet.htm?assetId=65517', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '65517', | 
					
						
							|  |  |  |             'ext': 'mp4', | 
					
						
							|  |  |  |             'title': 'Talkshowet - Leonard Cohen', | 
					
						
							|  |  |  |             'description': 'md5:8f34194fb30cd8c8a30ad8b27b70c0ca', | 
					
						
							| 
									
										
										
										
											2017-01-02 20:08:07 +08:00
										 |  |  |             'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', | 
					
						
							| 
									
										
										
										
											2015-01-11 23:23:10 +01:00
										 |  |  |             'timestamp': 1295537932, | 
					
						
							|  |  |  |             'upload_date': '20110120', | 
					
						
							| 
									
										
										
										
											2015-01-11 23:40:27 +01:00
										 |  |  |             'duration': 3664, | 
					
						
							| 
									
										
										
										
											2015-01-11 23:23:10 +01:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2015-06-24 16:00:12 +08:00
										 |  |  |         'params': { | 
					
						
							|  |  |  |             'skip_download': True,  # requires rtmp | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2015-01-11 23:40:27 +01:00
										 |  |  |     }, { | 
					
						
							| 
									
										
										
										
											2015-01-11 23:23:10 +01:00
										 |  |  |         'url': 'http://www.dr.dk/bonanza/radio/serie/sport/fodbold.htm?assetId=59410', | 
					
						
							|  |  |  |         'md5': '6dfe039417e76795fb783c52da3de11d', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '59410', | 
					
						
							|  |  |  |             'ext': 'mp3', | 
					
						
							|  |  |  |             'title': 'EM fodbold 1992 Danmark - Tyskland finale Transmission', | 
					
						
							|  |  |  |             'description': 'md5:501e5a195749480552e214fbbed16c4e', | 
					
						
							| 
									
										
										
										
											2017-01-02 20:08:07 +08:00
										 |  |  |             'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', | 
					
						
							| 
									
										
										
										
											2015-01-11 23:23:10 +01:00
										 |  |  |             'timestamp': 1223274900, | 
					
						
							|  |  |  |             'upload_date': '20081006', | 
					
						
							| 
									
										
										
										
											2015-01-11 23:40:27 +01:00
										 |  |  |             'duration': 7369, | 
					
						
							| 
									
										
										
										
											2015-01-11 23:23:10 +01:00
										 |  |  |         }, | 
					
						
							|  |  |  |     }] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         url_id = self._match_id(url) | 
					
						
							| 
									
										
										
										
											2015-01-11 23:40:27 +01:00
										 |  |  |         webpage = self._download_webpage(url, url_id) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-11 23:23:10 +01:00
										 |  |  |         if url_id: | 
					
						
							| 
									
										
										
										
											2015-01-11 23:40:27 +01:00
										 |  |  |             info = json.loads(self._html_search_regex(r'({.*?%s.*})' % url_id, webpage, 'json')) | 
					
						
							| 
									
										
										
										
											2015-01-11 23:23:10 +01:00
										 |  |  |         else: | 
					
						
							|  |  |  |             # Just fetch the first video on that page | 
					
						
							|  |  |  |             info = json.loads(self._html_search_regex(r'bonanzaFunctions.newPlaylist\(({.*})\)', webpage, 'json')) | 
					
						
							| 
									
										
										
										
											2015-01-11 23:40:27 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-11 23:23:10 +01:00
										 |  |  |         asset_id = str(info['AssetId']) | 
					
						
							|  |  |  |         title = info['Title'].rstrip(' \'\"-,.:;!?') | 
					
						
							| 
									
										
										
										
											2015-01-11 23:40:27 +01:00
										 |  |  |         duration = int_or_none(info.get('Duration'), scale=1000) | 
					
						
							|  |  |  |         # First published online. "FirstPublished" contains the date for original airing. | 
					
						
							|  |  |  |         timestamp = parse_iso8601( | 
					
						
							|  |  |  |             re.sub(r'\.\d+$', '', info['Created'])) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-11 23:23:10 +01:00
										 |  |  |         def parse_filename_info(url): | 
					
						
							|  |  |  |             match = re.search(r'/\d+_(?P<width>\d+)x(?P<height>\d+)x(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url) | 
					
						
							|  |  |  |             if match: | 
					
						
							| 
									
										
										
										
											2015-01-11 23:40:27 +01:00
										 |  |  |                 return { | 
					
						
							|  |  |  |                     'width': int(match.group('width')), | 
					
						
							|  |  |  |                     'height': int(match.group('height')), | 
					
						
							|  |  |  |                     'vbr': int(match.group('bitrate')), | 
					
						
							|  |  |  |                     'ext': match.group('ext') | 
					
						
							|  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2015-01-11 23:23:10 +01:00
										 |  |  |             match = re.search(r'/\d+_(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url) | 
					
						
							|  |  |  |             if match: | 
					
						
							| 
									
										
										
										
											2015-01-11 23:40:27 +01:00
										 |  |  |                 return { | 
					
						
							|  |  |  |                     'vbr': int(match.group('bitrate')), | 
					
						
							|  |  |  |                     'ext': match.group(2) | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             return {} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-11 23:23:10 +01:00
										 |  |  |         video_types = ['VideoHigh', 'VideoMid', 'VideoLow'] | 
					
						
							|  |  |  |         preferencemap = { | 
					
						
							|  |  |  |             'VideoHigh': -1, | 
					
						
							|  |  |  |             'VideoMid': -2, | 
					
						
							|  |  |  |             'VideoLow': -3, | 
					
						
							|  |  |  |             'Audio': -4, | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2015-01-11 23:40:27 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-11 23:23:10 +01:00
										 |  |  |         formats = [] | 
					
						
							|  |  |  |         for file in info['Files']: | 
					
						
							| 
									
										
										
										
											2016-02-14 15:37:17 +06:00
										 |  |  |             if info['Type'] == 'Video': | 
					
						
							| 
									
										
										
										
											2015-01-11 23:23:10 +01:00
										 |  |  |                 if file['Type'] in video_types: | 
					
						
							| 
									
										
										
										
											2015-01-11 23:40:27 +01:00
										 |  |  |                     format = parse_filename_info(file['Location']) | 
					
						
							|  |  |  |                     format.update({ | 
					
						
							| 
									
										
										
										
											2015-01-11 23:23:10 +01:00
										 |  |  |                         'url': file['Location'], | 
					
						
							|  |  |  |                         'format_id': file['Type'].replace('Video', ''), | 
					
						
							|  |  |  |                         'preference': preferencemap.get(file['Type'], -10), | 
					
						
							|  |  |  |                     }) | 
					
						
							| 
									
										
										
										
											2015-06-24 16:00:12 +08:00
										 |  |  |                     if format['url'].startswith('rtmp'): | 
					
						
							|  |  |  |                         rtmp_url = format['url'] | 
					
						
							|  |  |  |                         format['rtmp_live'] = True  # --resume does not work | 
					
						
							|  |  |  |                         if '/bonanza/' in rtmp_url: | 
					
						
							|  |  |  |                             format['play_path'] = rtmp_url.split('/bonanza/')[1] | 
					
						
							| 
									
										
										
										
											2015-01-11 23:40:27 +01:00
										 |  |  |                     formats.append(format) | 
					
						
							| 
									
										
										
										
											2016-02-14 15:37:17 +06:00
										 |  |  |                 elif file['Type'] == 'Thumb': | 
					
						
							| 
									
										
										
										
											2015-01-11 23:23:10 +01:00
										 |  |  |                     thumbnail = file['Location'] | 
					
						
							| 
									
										
										
										
											2016-02-14 15:37:17 +06:00
										 |  |  |             elif info['Type'] == 'Audio': | 
					
						
							|  |  |  |                 if file['Type'] == 'Audio': | 
					
						
							| 
									
										
										
										
											2015-01-11 23:40:27 +01:00
										 |  |  |                     format = parse_filename_info(file['Location']) | 
					
						
							|  |  |  |                     format.update({ | 
					
						
							| 
									
										
										
										
											2015-01-11 23:23:10 +01:00
										 |  |  |                         'url': file['Location'], | 
					
						
							|  |  |  |                         'format_id': file['Type'], | 
					
						
							|  |  |  |                         'vcodec': 'none', | 
					
						
							|  |  |  |                     }) | 
					
						
							| 
									
										
										
										
											2015-01-11 23:40:27 +01:00
										 |  |  |                     formats.append(format) | 
					
						
							| 
									
										
										
										
											2016-02-14 15:37:17 +06:00
										 |  |  |                 elif file['Type'] == 'Thumb': | 
					
						
							| 
									
										
										
										
											2015-01-11 23:23:10 +01:00
										 |  |  |                     thumbnail = file['Location'] | 
					
						
							| 
									
										
										
										
											2015-01-11 23:40:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         description = '%s\n%s\n%s\n' % ( | 
					
						
							|  |  |  |             info['Description'], info['Actors'], info['Colophon']) | 
					
						
							| 
									
										
										
										
											2015-01-11 23:23:10 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         self._sort_formats(formats) | 
					
						
							| 
									
										
										
										
											2015-01-11 23:40:27 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-11 23:23:10 +01:00
										 |  |  |         display_id = re.sub(r'[^\w\d-]', '', re.sub(r' ', '-', title.lower())) + '-' + asset_id | 
					
						
							|  |  |  |         display_id = re.sub(r'-+', '-', display_id) | 
					
						
							| 
									
										
										
										
											2015-01-11 23:40:27 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-11 23:23:10 +01:00
										 |  |  |         return { | 
					
						
							|  |  |  |             'id': asset_id, | 
					
						
							|  |  |  |             'display_id': display_id, | 
					
						
							|  |  |  |             'title': title, | 
					
						
							|  |  |  |             'formats': formats, | 
					
						
							|  |  |  |             'description': description, | 
					
						
							|  |  |  |             'thumbnail': thumbnail, | 
					
						
							|  |  |  |             'timestamp': timestamp, | 
					
						
							|  |  |  |             'duration': duration, | 
					
						
							|  |  |  |         } |