| 
									
										
										
										
											2016-10-02 13:39:18 +02:00
										 |  |  | # coding: utf-8 | 
					
						
							| 
									
										
										
										
											2014-05-01 21:15:25 +07:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-04-03 10:19:36 +01:00
										 |  |  | import base64 | 
					
						
							|  |  |  | import hashlib | 
					
						
							| 
									
										
										
										
											2014-05-01 21:15:25 +07:00
										 |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2019-04-03 10:19:36 +01:00
										 |  |  | from ..aes import aes_cbc_decrypt | 
					
						
							| 
									
										
										
										
											2016-04-22 11:26:43 +01:00
										 |  |  | from ..utils import ( | 
					
						
							| 
									
										
										
										
											2019-04-03 10:19:36 +01:00
										 |  |  |     bytes_to_intlist, | 
					
						
							| 
									
										
										
										
											2016-04-22 11:26:43 +01:00
										 |  |  |     int_or_none, | 
					
						
							| 
									
										
										
										
											2019-04-03 10:19:36 +01:00
										 |  |  |     intlist_to_bytes, | 
					
						
							|  |  |  |     parse_codecs, | 
					
						
							|  |  |  |     parse_duration, | 
					
						
							| 
									
										
										
										
											2016-04-22 11:26:43 +01:00
										 |  |  | ) | 
					
						
							| 
									
										
										
										
											2014-05-01 21:15:25 +07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class NewstubeIE(InfoExtractor): | 
					
						
							|  |  |  |     _VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P<id>.+)' | 
					
						
							|  |  |  |     _TEST = { | 
					
						
							| 
									
										
										
										
											2014-07-06 19:32:13 +07:00
										 |  |  |         'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym', | 
					
						
							| 
									
										
										
										
											2019-04-03 10:19:36 +01:00
										 |  |  |         'md5': '9d10320ad473444352f72f746ccb8b8c', | 
					
						
							| 
									
										
										
										
											2014-05-01 21:15:25 +07:00
										 |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2014-07-06 19:32:13 +07:00
										 |  |  |             'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6', | 
					
						
							| 
									
										
										
										
											2016-04-22 11:26:43 +01:00
										 |  |  |             'ext': 'mp4', | 
					
						
							| 
									
										
										
										
											2014-07-06 19:32:13 +07:00
										 |  |  |             'title': 'Телеканал CNN переместил город Славянск в Крым', | 
					
						
							|  |  |  |             'description': 'md5:419a8c9f03442bc0b0a794d689360335', | 
					
						
							|  |  |  |             'duration': 31.05, | 
					
						
							| 
									
										
										
										
											2014-05-01 21:15:25 +07:00
										 |  |  |         }, | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2019-04-03 10:19:36 +01:00
										 |  |  |         video_id = self._match_id(url) | 
					
						
							| 
									
										
										
										
											2014-05-01 21:15:25 +07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-04-03 10:19:36 +01:00
										 |  |  |         page = self._download_webpage(url, video_id) | 
					
						
							|  |  |  |         title = self._html_search_meta(['og:title', 'twitter:title'], page, fatal=True) | 
					
						
							| 
									
										
										
										
											2014-05-01 21:15:25 +07:00
										 |  |  | 
 | 
					
						
							|  |  |  |         video_guid = self._html_search_regex( | 
					
						
							| 
									
										
										
										
											2019-04-03 10:19:36 +01:00
										 |  |  |             r'<meta\s+property="og:video(?::(?:(?:secure_)?url|iframe))?"\s+content="https?://(?:www\.)?newstube\.ru/embed/(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', | 
					
						
							| 
									
										
										
										
											2014-05-01 21:15:25 +07:00
										 |  |  |             page, 'video GUID') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-04-03 10:19:36 +01:00
										 |  |  |         enc_data = base64.b64decode(self._download_webpage( | 
					
						
							|  |  |  |             'https://www.newstube.ru/embed/api/player/getsources2', | 
					
						
							|  |  |  |             video_guid, query={ | 
					
						
							|  |  |  |                 'guid': video_guid, | 
					
						
							|  |  |  |                 'ff': 3, | 
					
						
							|  |  |  |             })) | 
					
						
							|  |  |  |         key = hashlib.pbkdf2_hmac( | 
					
						
							|  |  |  |             'sha1', video_guid.replace('-', '').encode(), enc_data[:16], 1)[:16] | 
					
						
							|  |  |  |         dec_data = aes_cbc_decrypt( | 
					
						
							|  |  |  |             bytes_to_intlist(enc_data[32:]), bytes_to_intlist(key), | 
					
						
							|  |  |  |             bytes_to_intlist(enc_data[16:32])) | 
					
						
							|  |  |  |         sources = self._parse_json(intlist_to_bytes(dec_data[:-dec_data[-1]]), video_guid) | 
					
						
							| 
									
										
										
										
											2014-05-01 21:15:25 +07:00
										 |  |  | 
 | 
					
						
							|  |  |  |         formats = [] | 
					
						
							| 
									
										
										
										
											2019-04-03 10:19:36 +01:00
										 |  |  |         for source in sources: | 
					
						
							|  |  |  |             source_url = source.get('Src') | 
					
						
							|  |  |  |             if not source_url: | 
					
						
							| 
									
										
										
										
											2014-05-01 21:15:25 +07:00
										 |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2019-04-03 10:19:36 +01:00
										 |  |  |             height = int_or_none(source.get('Height')) | 
					
						
							|  |  |  |             f = { | 
					
						
							|  |  |  |                 'format_id': 'http' + ('-%dp' % height if height else ''), | 
					
						
							|  |  |  |                 'url': source_url, | 
					
						
							|  |  |  |                 'width': int_or_none(source.get('Width')), | 
					
						
							| 
									
										
										
										
											2014-05-01 21:15:25 +07:00
										 |  |  |                 'height': height, | 
					
						
							| 
									
										
										
										
											2019-04-03 10:19:36 +01:00
										 |  |  |             } | 
					
						
							|  |  |  |             source_type = source.get('Type') | 
					
						
							|  |  |  |             if source_type: | 
					
						
							|  |  |  |                 f.update(parse_codecs(self._search_regex( | 
					
						
							|  |  |  |                     r'codecs="([^"]+)"', source_type, 'codecs', fatal=False))) | 
					
						
							|  |  |  |             formats.append(f) | 
					
						
							| 
									
										
										
										
											2016-04-22 11:26:43 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         self._check_formats(formats, video_guid) | 
					
						
							| 
									
										
										
										
											2014-05-01 21:15:25 +07:00
										 |  |  |         self._sort_formats(formats) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             'id': video_guid, | 
					
						
							|  |  |  |             'title': title, | 
					
						
							| 
									
										
										
										
											2019-04-03 10:19:36 +01:00
										 |  |  |             'description': self._html_search_meta(['description', 'og:description'], page), | 
					
						
							|  |  |  |             'thumbnail': self._html_search_meta(['og:image:secure_url', 'og:image', 'twitter:image'], page), | 
					
						
							|  |  |  |             'duration': parse_duration(self._html_search_meta('duration', page)), | 
					
						
							| 
									
										
										
										
											2014-05-01 21:15:25 +07:00
										 |  |  |             'formats': formats, | 
					
						
							| 
									
										
										
										
											2014-11-23 20:41:03 +01:00
										 |  |  |         } |