| 
									
										
										
										
											2014-03-01 11:47:52 +01:00
										 |  |  | # -*- coding: utf-8 -*- | 
					
						
							|  |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-17 21:17:47 +01:00
										 |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2014-12-13 12:24:42 +01:00
										 |  |  | from ..compat import ( | 
					
						
							| 
									
										
										
										
											2014-03-01 11:47:52 +01:00
										 |  |  |     compat_urllib_request, | 
					
						
							|  |  |  |     compat_urllib_parse, | 
					
						
							| 
									
										
										
										
											2015-07-17 23:36:11 +06:00
										 |  |  |     compat_urllib_parse_unquote, | 
					
						
							| 
									
										
										
										
											2014-03-01 16:17:29 +01:00
										 |  |  |     compat_urllib_parse_urlparse, | 
					
						
							| 
									
										
										
										
											2014-12-13 12:24:42 +01:00
										 |  |  | ) | 
					
						
							|  |  |  | from ..utils import ( | 
					
						
							| 
									
										
										
										
											2014-03-01 16:17:29 +01:00
										 |  |  |     ExtractorError, | 
					
						
							| 
									
										
										
										
											2015-01-01 20:01:55 +06:00
										 |  |  |     float_or_none, | 
					
						
							| 
									
										
										
										
											2014-03-01 11:47:52 +01:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-17 21:17:47 +01:00
										 |  |  | class CeskaTelevizeIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2014-03-01 11:47:52 +01:00
										 |  |  |     _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-01 22:54:37 +07:00
										 |  |  |     _TESTS = [ | 
					
						
							|  |  |  |         { | 
					
						
							| 
									
										
										
										
											2015-01-01 20:01:55 +06:00
										 |  |  |             'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', | 
					
						
							| 
									
										
										
										
											2014-03-01 22:54:37 +07:00
										 |  |  |             'info_dict': { | 
					
						
							| 
									
										
										
										
											2015-01-01 20:01:55 +06:00
										 |  |  |                 'id': '214411058091220', | 
					
						
							|  |  |  |                 'ext': 'mp4', | 
					
						
							|  |  |  |                 'title': 'Hyde Park Civilizace', | 
					
						
							|  |  |  |                 'description': 'Věda a současná civilizace. Interaktivní pořad - prostor pro vaše otázky a komentáře', | 
					
						
							|  |  |  |                 'thumbnail': 're:^https?://.*\.jpg', | 
					
						
							|  |  |  |                 'duration': 3350, | 
					
						
							| 
									
										
										
										
											2014-03-01 22:54:37 +07:00
										 |  |  |             }, | 
					
						
							|  |  |  |             'params': { | 
					
						
							| 
									
										
										
										
											2015-01-01 20:01:55 +06:00
										 |  |  |                 # m3u8 download | 
					
						
							|  |  |  |                 'skip_download': True, | 
					
						
							| 
									
										
										
										
											2014-03-01 22:54:37 +07:00
										 |  |  |             }, | 
					
						
							| 
									
										
										
										
											2014-03-01 11:47:52 +01:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2014-03-01 22:54:37 +07:00
										 |  |  |         { | 
					
						
							|  |  |  |             'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina', | 
					
						
							|  |  |  |             'info_dict': { | 
					
						
							|  |  |  |                 'id': '14716', | 
					
						
							| 
									
										
										
										
											2015-01-01 20:01:55 +06:00
										 |  |  |                 'ext': 'mp4', | 
					
						
							| 
									
										
										
										
											2014-03-01 22:54:37 +07:00
										 |  |  |                 'title': 'První republika: Zpěvačka z Dupárny Bobina', | 
					
						
							| 
									
										
										
										
											2015-01-01 20:01:55 +06:00
										 |  |  |                 'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.', | 
					
						
							|  |  |  |                 'thumbnail': 're:^https?://.*\.jpg', | 
					
						
							|  |  |  |                 'duration': 88.4, | 
					
						
							| 
									
										
										
										
											2014-03-01 22:54:37 +07:00
										 |  |  |             }, | 
					
						
							|  |  |  |             'params': { | 
					
						
							| 
									
										
										
										
											2015-01-01 20:01:55 +06:00
										 |  |  |                 # m3u8 download | 
					
						
							|  |  |  |                 'skip_download': True, | 
					
						
							| 
									
										
										
										
											2014-03-01 22:54:37 +07:00
										 |  |  |             }, | 
					
						
							| 
									
										
										
										
											2014-03-01 11:47:52 +01:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2014-03-01 22:54:37 +07:00
										 |  |  |     ] | 
					
						
							| 
									
										
										
										
											2014-03-01 11:47:52 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         url = url.replace('/porady/', '/ivysilani/').replace('/video/', '') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							|  |  |  |         video_id = mobj.group('id') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         webpage = self._download_webpage(url, video_id) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-01 22:54:37 +07:00
										 |  |  |         NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' | 
					
						
							|  |  |  |         if '%s</p>' % NOT_AVAILABLE_STRING in webpage: | 
					
						
							|  |  |  |             raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | 
					
						
							| 
									
										
										
										
											2014-03-01 16:17:29 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-01 11:47:52 +01:00
										 |  |  |         typ = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type') | 
					
						
							|  |  |  |         episode_id = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         data = { | 
					
						
							|  |  |  |             'playlist[0][type]': typ, | 
					
						
							|  |  |  |             'playlist[0][id]': episode_id, | 
					
						
							|  |  |  |             'requestUrl': compat_urllib_parse_urlparse(url).path, | 
					
						
							|  |  |  |             'requestSource': 'iVysilani', | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-01 20:01:55 +06:00
										 |  |  |         req = compat_urllib_request.Request( | 
					
						
							|  |  |  |             'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', | 
					
						
							|  |  |  |             data=compat_urllib_parse.urlencode(data)) | 
					
						
							| 
									
										
										
										
											2014-03-01 11:47:52 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         req.add_header('Content-type', 'application/x-www-form-urlencoded') | 
					
						
							|  |  |  |         req.add_header('x-addr', '127.0.0.1') | 
					
						
							|  |  |  |         req.add_header('X-Requested-With', 'XMLHttpRequest') | 
					
						
							|  |  |  |         req.add_header('Referer', url) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-01 23:05:33 +07:00
										 |  |  |         playlistpage = self._download_json(req, video_id) | 
					
						
							| 
									
										
										
										
											2014-03-01 11:47:52 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-01 20:01:55 +06:00
										 |  |  |         playlist_url = playlistpage['url'] | 
					
						
							|  |  |  |         if playlist_url == 'error_region': | 
					
						
							|  |  |  |             raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-07-17 23:36:11 +06:00
										 |  |  |         req = compat_urllib_request.Request(compat_urllib_parse_unquote(playlist_url)) | 
					
						
							| 
									
										
										
										
											2014-03-01 11:47:52 +01:00
										 |  |  |         req.add_header('Referer', url) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-01 20:01:55 +06:00
										 |  |  |         playlist = self._download_json(req, video_id) | 
					
						
							| 
									
										
										
										
											2014-11-23 20:41:03 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-01 20:01:55 +06:00
										 |  |  |         item = playlist['playlist'][0] | 
					
						
							| 
									
										
										
										
											2014-03-01 11:47:52 +01:00
										 |  |  |         formats = [] | 
					
						
							| 
									
										
										
										
											2015-01-01 20:01:55 +06:00
										 |  |  |         for format_id, stream_url in item['streamUrls'].items(): | 
					
						
							|  |  |  |             formats.extend(self._extract_m3u8_formats(stream_url, video_id, 'mp4')) | 
					
						
							| 
									
										
										
										
											2014-03-01 11:47:52 +01:00
										 |  |  |         self._sort_formats(formats) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-01 20:01:55 +06:00
										 |  |  |         title = self._og_search_title(webpage) | 
					
						
							|  |  |  |         description = self._og_search_description(webpage) | 
					
						
							|  |  |  |         duration = float_or_none(item.get('duration')) | 
					
						
							|  |  |  |         thumbnail = item.get('previewImageUrl') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-07 05:03:14 +06:00
										 |  |  |         subtitles = {} | 
					
						
							|  |  |  |         subs = item.get('subtitles') | 
					
						
							|  |  |  |         if subs: | 
					
						
							| 
									
										
										
										
											2015-02-17 21:17:47 +01:00
										 |  |  |             subtitles = self.extract_subtitles(episode_id, subs) | 
					
						
							| 
									
										
										
										
											2015-01-02 17:12:20 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-01 11:47:52 +01:00
										 |  |  |         return { | 
					
						
							|  |  |  |             'id': episode_id, | 
					
						
							| 
									
										
										
										
											2015-01-01 20:01:55 +06:00
										 |  |  |             'title': title, | 
					
						
							|  |  |  |             'description': description, | 
					
						
							|  |  |  |             'thumbnail': thumbnail, | 
					
						
							|  |  |  |             'duration': duration, | 
					
						
							| 
									
										
										
										
											2014-03-01 11:47:52 +01:00
										 |  |  |             'formats': formats, | 
					
						
							| 
									
										
										
										
											2015-01-02 17:12:20 +01:00
										 |  |  |             'subtitles': subtitles, | 
					
						
							| 
									
										
										
										
											2014-03-01 11:47:52 +01:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2015-01-02 17:12:20 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-17 21:17:47 +01:00
										 |  |  |     def _get_subtitles(self, episode_id, subs): | 
					
						
							|  |  |  |         original_subtitles = self._download_webpage( | 
					
						
							|  |  |  |             subs[0]['url'], episode_id, 'Downloading subtitles') | 
					
						
							|  |  |  |         srt_subs = self._fix_subtitles(original_subtitles) | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             'cs': [{ | 
					
						
							|  |  |  |                 'ext': 'srt', | 
					
						
							|  |  |  |                 'data': srt_subs, | 
					
						
							|  |  |  |             }] | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-07 05:03:14 +06:00
										 |  |  |     @staticmethod | 
					
						
							|  |  |  |     def _fix_subtitles(subtitles): | 
					
						
							|  |  |  |         """ Convert millisecond-based subtitles to SRT """ | 
					
						
							| 
									
										
										
										
											2015-01-02 17:12:20 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         def _msectotimecode(msec): | 
					
						
							| 
									
										
										
										
											2015-01-07 05:03:14 +06:00
										 |  |  |             """ Helper utility to convert milliseconds to timecode """ | 
					
						
							| 
									
										
										
										
											2015-01-02 17:12:20 +01:00
										 |  |  |             components = [] | 
					
						
							|  |  |  |             for divider in [1000, 60, 60, 100]: | 
					
						
							|  |  |  |                 components.append(msec % divider) | 
					
						
							|  |  |  |                 msec //= divider | 
					
						
							|  |  |  |             return "{3:02}:{2:02}:{1:02},{0:03}".format(*components) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         def _fix_subtitle(subtitle): | 
					
						
							|  |  |  |             for line in subtitle.splitlines(): | 
					
						
							| 
									
										
										
										
											2015-01-07 05:03:14 +06:00
										 |  |  |                 m = re.match(r"^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$", line) | 
					
						
							| 
									
										
										
										
											2015-01-02 17:12:20 +01:00
										 |  |  |                 if m: | 
					
						
							|  |  |  |                     yield m.group(1) | 
					
						
							|  |  |  |                     start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:]) | 
					
						
							| 
									
										
										
										
											2015-01-07 05:03:34 +06:00
										 |  |  |                     yield "{0} --> {1}".format(start, stop) | 
					
						
							| 
									
										
										
										
											2015-01-02 17:12:20 +01:00
										 |  |  |                 else: | 
					
						
							|  |  |  |                     yield line | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-17 21:17:47 +01:00
										 |  |  |         return "\r\n".join(_fix_subtitle(subtitles)) |