| 
									
										
										
										
											2016-10-02 13:39:18 +02:00
										 |  |  | # coding: utf-8 | 
					
						
							| 
									
										
										
										
											2014-03-01 11:47:52 +01:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-17 21:17:47 +01:00
										 |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2014-12-13 12:24:42 +01:00
										 |  |  | from ..compat import ( | 
					
						
							| 
									
										
										
										
											2015-07-17 23:36:11 +06:00
										 |  |  |     compat_urllib_parse_unquote, | 
					
						
							| 
									
										
										
										
											2014-03-01 16:17:29 +01:00
										 |  |  |     compat_urllib_parse_urlparse, | 
					
						
							| 
									
										
										
										
											2014-12-13 12:24:42 +01:00
										 |  |  | ) | 
					
						
							|  |  |  | from ..utils import ( | 
					
						
							| 
									
										
										
										
											2014-03-01 16:17:29 +01:00
										 |  |  |     ExtractorError, | 
					
						
							| 
									
										
										
										
											2015-01-01 20:01:55 +06:00
										 |  |  |     float_or_none, | 
					
						
							| 
									
										
										
										
											2015-11-21 22:18:17 +06:00
										 |  |  |     sanitized_Request, | 
					
						
							| 
									
										
										
										
											2017-04-08 19:42:09 +07:00
										 |  |  |     unescapeHTML, | 
					
						
							| 
									
										
										
										
											2016-03-26 02:19:24 +06:00
										 |  |  |     urlencode_postdata, | 
					
						
							| 
									
										
										
										
											2017-02-14 22:56:39 +07:00
										 |  |  |     USER_AGENTS, | 
					
						
							| 
									
										
										
										
											2014-03-01 11:47:52 +01:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-17 21:17:47 +01:00
										 |  |  | class CeskaTelevizeIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2017-04-08 19:41:14 +07:00
										 |  |  |     _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' | 
					
						
							| 
									
										
										
										
											2015-09-07 04:52:26 +06:00
										 |  |  |     _TESTS = [{ | 
					
						
							|  |  |  |         'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2017-02-14 15:57:17 +01:00
										 |  |  |             'id': '61924494877246241', | 
					
						
							| 
									
										
										
										
											2015-09-07 04:52:26 +06:00
										 |  |  |             'ext': 'mp4', | 
					
						
							| 
									
										
										
										
											2017-02-14 15:57:17 +01:00
										 |  |  |             'title': 'Hyde Park Civilizace: Život v Grónsku', | 
					
						
							|  |  |  |             'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626', | 
					
						
							| 
									
										
										
										
											2017-01-02 20:08:07 +08:00
										 |  |  |             'thumbnail': r're:^https?://.*\.jpg', | 
					
						
							| 
									
										
										
										
											2015-09-07 04:52:26 +06:00
										 |  |  |             'duration': 3350, | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'params': { | 
					
						
							|  |  |  |             # m3u8 download | 
					
						
							|  |  |  |             'skip_download': True, | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2016-05-09 20:37:20 +06:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '61924494877028507', | 
					
						
							|  |  |  |             'ext': 'mp4', | 
					
						
							|  |  |  |             'title': 'Hyde Park Civilizace: Bonus 01 - En', | 
					
						
							|  |  |  |             'description': 'English Subtittles', | 
					
						
							| 
									
										
										
										
											2017-01-02 20:08:07 +08:00
										 |  |  |             'thumbnail': r're:^https?://.*\.jpg', | 
					
						
							| 
									
										
										
										
											2016-05-09 20:37:20 +06:00
										 |  |  |             'duration': 81.3, | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'params': { | 
					
						
							|  |  |  |             # m3u8 download | 
					
						
							|  |  |  |             'skip_download': True, | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2015-09-07 04:52:26 +06:00
										 |  |  |     }, { | 
					
						
							| 
									
										
										
										
											2016-05-07 20:15:49 +02:00
										 |  |  |         # live stream | 
					
						
							|  |  |  |         'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', | 
					
						
							| 
									
										
										
										
											2015-09-07 04:52:26 +06:00
										 |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2016-05-07 20:15:49 +02:00
										 |  |  |             'id': 402, | 
					
						
							| 
									
										
										
										
											2015-09-07 04:52:26 +06:00
										 |  |  |             'ext': 'mp4', | 
					
						
							| 
									
										
										
										
											2017-01-02 20:08:07 +08:00
										 |  |  |             'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', | 
					
						
							| 
									
										
										
										
											2016-05-07 20:15:49 +02:00
										 |  |  |             'is_live': True, | 
					
						
							| 
									
										
										
										
											2015-09-07 04:52:26 +06:00
										 |  |  |         }, | 
					
						
							|  |  |  |         'params': { | 
					
						
							|  |  |  |             # m3u8 download | 
					
						
							|  |  |  |             'skip_download': True, | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2016-05-09 20:37:20 +06:00
										 |  |  |         'skip': 'Georestricted to Czech Republic', | 
					
						
							| 
									
										
										
										
											2015-09-07 04:52:26 +06:00
										 |  |  |     }, { | 
					
						
							| 
									
										
										
										
											2017-04-08 19:41:14 +07:00
										 |  |  |         'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2015-09-07 04:52:26 +06:00
										 |  |  |     }] | 
					
						
							| 
									
										
										
										
											2014-03-01 11:47:52 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2017-04-08 19:41:14 +07:00
										 |  |  |         playlist_id = self._match_id(url) | 
					
						
							| 
									
										
										
										
											2014-03-01 11:47:52 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-09-07 04:52:26 +06:00
										 |  |  |         webpage = self._download_webpage(url, playlist_id) | 
					
						
							| 
									
										
										
										
											2014-03-01 11:47:52 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-01 22:54:37 +07:00
										 |  |  |         NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' | 
					
						
							|  |  |  |         if '%s</p>' % NOT_AVAILABLE_STRING in webpage: | 
					
						
							|  |  |  |             raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | 
					
						
							| 
									
										
										
										
											2014-03-01 16:17:29 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-04-08 19:41:14 +07:00
										 |  |  |         type_ = None | 
					
						
							|  |  |  |         episode_id = None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         playlist = self._parse_json( | 
					
						
							|  |  |  |             self._search_regex( | 
					
						
							|  |  |  |                 r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist', | 
					
						
							|  |  |  |                 default='{}'), playlist_id) | 
					
						
							|  |  |  |         if playlist: | 
					
						
							|  |  |  |             type_ = playlist.get('type') | 
					
						
							|  |  |  |             episode_id = playlist.get('id') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if not type_: | 
					
						
							|  |  |  |             type_ = self._html_search_regex( | 
					
						
							|  |  |  |                 r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', | 
					
						
							|  |  |  |                 webpage, 'type') | 
					
						
							|  |  |  |         if not episode_id: | 
					
						
							|  |  |  |             episode_id = self._html_search_regex( | 
					
						
							|  |  |  |                 r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', | 
					
						
							|  |  |  |                 webpage, 'episode_id') | 
					
						
							| 
									
										
										
										
											2014-03-01 11:47:52 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         data = { | 
					
						
							| 
									
										
										
										
											2017-04-08 19:41:14 +07:00
										 |  |  |             'playlist[0][type]': type_, | 
					
						
							| 
									
										
										
										
											2014-03-01 11:47:52 +01:00
										 |  |  |             'playlist[0][id]': episode_id, | 
					
						
							|  |  |  |             'requestUrl': compat_urllib_parse_urlparse(url).path, | 
					
						
							|  |  |  |             'requestSource': 'iVysilani', | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-09-07 04:52:26 +06:00
										 |  |  |         entries = [] | 
					
						
							| 
									
										
										
										
											2017-02-14 22:56:39 +07:00
										 |  |  | 
 | 
					
						
							|  |  |  |         for user_agent in (None, USER_AGENTS['Safari']): | 
					
						
							|  |  |  |             req = sanitized_Request( | 
					
						
							|  |  |  |                 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', | 
					
						
							|  |  |  |                 data=urlencode_postdata(data)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             req.add_header('Content-type', 'application/x-www-form-urlencoded') | 
					
						
							|  |  |  |             req.add_header('x-addr', '127.0.0.1') | 
					
						
							|  |  |  |             req.add_header('X-Requested-With', 'XMLHttpRequest') | 
					
						
							|  |  |  |             if user_agent: | 
					
						
							|  |  |  |                 req.add_header('User-Agent', user_agent) | 
					
						
							|  |  |  |             req.add_header('Referer', url) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             playlistpage = self._download_json(req, playlist_id, fatal=False) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             if not playlistpage: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             playlist_url = playlistpage['url'] | 
					
						
							|  |  |  |             if playlist_url == 'error_region': | 
					
						
							|  |  |  |                 raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) | 
					
						
							|  |  |  |             req.add_header('Referer', url) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             playlist_title = self._og_search_title(webpage, default=None) | 
					
						
							|  |  |  |             playlist_description = self._og_search_description(webpage, default=None) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             playlist = self._download_json(req, playlist_id, fatal=False) | 
					
						
							|  |  |  |             if not playlist: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             playlist = playlist.get('playlist') | 
					
						
							|  |  |  |             if not isinstance(playlist, list): | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             playlist_len = len(playlist) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             for num, item in enumerate(playlist): | 
					
						
							|  |  |  |                 is_live = item.get('type') == 'LIVE' | 
					
						
							|  |  |  |                 formats = [] | 
					
						
							|  |  |  |                 for format_id, stream_url in item.get('streamUrls', {}).items(): | 
					
						
							|  |  |  |                     if 'playerType=flash' in stream_url: | 
					
						
							| 
									
										
										
										
											2017-02-16 00:04:15 +07:00
										 |  |  |                         stream_formats = self._extract_m3u8_formats( | 
					
						
							| 
									
										
										
										
											2017-03-25 19:37:54 +01:00
										 |  |  |                             stream_url, playlist_id, 'mp4', 'm3u8_native', | 
					
						
							| 
									
										
										
										
											2017-02-16 00:04:15 +07:00
										 |  |  |                             m3u8_id='hls-%s' % format_id, fatal=False) | 
					
						
							| 
									
										
										
										
											2017-02-14 22:56:39 +07:00
										 |  |  |                     else: | 
					
						
							| 
									
										
										
										
											2017-02-16 00:04:15 +07:00
										 |  |  |                         stream_formats = self._extract_mpd_formats( | 
					
						
							|  |  |  |                             stream_url, playlist_id, | 
					
						
							|  |  |  |                             mpd_id='dash-%s' % format_id, fatal=False) | 
					
						
							|  |  |  |                     # See https://github.com/rg3/youtube-dl/issues/12119#issuecomment-280037031 | 
					
						
							|  |  |  |                     if format_id == 'audioDescription': | 
					
						
							|  |  |  |                         for f in stream_formats: | 
					
						
							|  |  |  |                             f['source_preference'] = -10 | 
					
						
							|  |  |  |                     formats.extend(stream_formats) | 
					
						
							| 
									
										
										
										
											2017-02-14 22:56:39 +07:00
										 |  |  | 
 | 
					
						
							|  |  |  |                 if user_agent and len(entries) == playlist_len: | 
					
						
							|  |  |  |                     entries[num]['formats'].extend(formats) | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 item_id = item.get('id') or item['assetId'] | 
					
						
							|  |  |  |                 title = item['title'] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 duration = float_or_none(item.get('duration')) | 
					
						
							|  |  |  |                 thumbnail = item.get('previewImageUrl') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 subtitles = {} | 
					
						
							|  |  |  |                 if item.get('type') == 'VOD': | 
					
						
							|  |  |  |                     subs = item.get('subtitles') | 
					
						
							|  |  |  |                     if subs: | 
					
						
							|  |  |  |                         subtitles = self.extract_subtitles(episode_id, subs) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 if playlist_len == 1: | 
					
						
							|  |  |  |                     final_title = playlist_title or title | 
					
						
							|  |  |  |                     if is_live: | 
					
						
							|  |  |  |                         final_title = self._live_title(final_title) | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     final_title = '%s (%s)' % (playlist_title, title) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 entries.append({ | 
					
						
							|  |  |  |                     'id': item_id, | 
					
						
							|  |  |  |                     'title': final_title, | 
					
						
							|  |  |  |                     'description': playlist_description if playlist_len == 1 else None, | 
					
						
							|  |  |  |                     'thumbnail': thumbnail, | 
					
						
							|  |  |  |                     'duration': duration, | 
					
						
							|  |  |  |                     'formats': formats, | 
					
						
							|  |  |  |                     'subtitles': subtitles, | 
					
						
							|  |  |  |                     'is_live': is_live, | 
					
						
							|  |  |  |                 }) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         for e in entries: | 
					
						
							|  |  |  |             self._sort_formats(e['formats']) | 
					
						
							| 
									
										
										
										
											2015-09-07 04:52:26 +06:00
										 |  |  | 
 | 
					
						
							|  |  |  |         return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) | 
					
						
							| 
									
										
										
										
											2015-01-02 17:12:20 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-17 21:17:47 +01:00
										 |  |  |     def _get_subtitles(self, episode_id, subs): | 
					
						
							|  |  |  |         original_subtitles = self._download_webpage( | 
					
						
							|  |  |  |             subs[0]['url'], episode_id, 'Downloading subtitles') | 
					
						
							|  |  |  |         srt_subs = self._fix_subtitles(original_subtitles) | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             'cs': [{ | 
					
						
							|  |  |  |                 'ext': 'srt', | 
					
						
							|  |  |  |                 'data': srt_subs, | 
					
						
							|  |  |  |             }] | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-07 05:03:14 +06:00
										 |  |  |     @staticmethod | 
					
						
							|  |  |  |     def _fix_subtitles(subtitles): | 
					
						
							|  |  |  |         """ Convert millisecond-based subtitles to SRT """ | 
					
						
							| 
									
										
										
										
											2015-01-02 17:12:20 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         def _msectotimecode(msec): | 
					
						
							| 
									
										
										
										
											2015-01-07 05:03:14 +06:00
										 |  |  |             """ Helper utility to convert milliseconds to timecode """ | 
					
						
							| 
									
										
										
										
											2015-01-02 17:12:20 +01:00
										 |  |  |             components = [] | 
					
						
							|  |  |  |             for divider in [1000, 60, 60, 100]: | 
					
						
							|  |  |  |                 components.append(msec % divider) | 
					
						
							|  |  |  |                 msec //= divider | 
					
						
							| 
									
										
										
										
											2016-02-14 15:37:17 +06:00
										 |  |  |             return '{3:02}:{2:02}:{1:02},{0:03}'.format(*components) | 
					
						
							| 
									
										
										
										
											2015-01-02 17:12:20 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         def _fix_subtitle(subtitle): | 
					
						
							|  |  |  |             for line in subtitle.splitlines(): | 
					
						
							| 
									
										
										
										
											2016-02-14 15:37:17 +06:00
										 |  |  |                 m = re.match(r'^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$', line) | 
					
						
							| 
									
										
										
										
											2015-01-02 17:12:20 +01:00
										 |  |  |                 if m: | 
					
						
							|  |  |  |                     yield m.group(1) | 
					
						
							|  |  |  |                     start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:]) | 
					
						
							| 
									
										
										
										
											2016-02-14 15:37:17 +06:00
										 |  |  |                     yield '{0} --> {1}'.format(start, stop) | 
					
						
							| 
									
										
										
										
											2015-01-02 17:12:20 +01:00
										 |  |  |                 else: | 
					
						
							|  |  |  |                     yield line | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-02-14 15:37:17 +06:00
										 |  |  |         return '\r\n'.join(_fix_subtitle(subtitles)) | 
					
						
							| 
									
										
										
										
											2017-04-08 19:42:09 +07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class CeskaTelevizePoradyIE(InfoExtractor): | 
					
						
							|  |  |  |     _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' | 
					
						
							|  |  |  |     _TESTS = [{ | 
					
						
							|  |  |  |         # video with 18+ caution trailer | 
					
						
							|  |  |  |         'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '215562210900007-bogotart', | 
					
						
							|  |  |  |             'title': 'Queer: Bogotart', | 
					
						
							|  |  |  |             'description': 'Alternativní průvodce současným queer světem', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'playlist': [{ | 
					
						
							|  |  |  |             'info_dict': { | 
					
						
							|  |  |  |                 'id': '61924494876844842', | 
					
						
							|  |  |  |                 'ext': 'mp4', | 
					
						
							|  |  |  |                 'title': 'Queer: Bogotart (Varování 18+)', | 
					
						
							|  |  |  |                 'duration': 10.2, | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         }, { | 
					
						
							|  |  |  |             'info_dict': { | 
					
						
							|  |  |  |                 'id': '61924494877068022', | 
					
						
							|  |  |  |                 'ext': 'mp4', | 
					
						
							|  |  |  |                 'title': 'Queer: Bogotart (Queer)', | 
					
						
							|  |  |  |                 'thumbnail': r're:^https?://.*\.jpg', | 
					
						
							|  |  |  |                 'duration': 1558.3, | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         }], | 
					
						
							|  |  |  |         'params': { | 
					
						
							|  |  |  |             # m3u8 download | 
					
						
							|  |  |  |             'skip_download': True, | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |     }] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         video_id = self._match_id(url) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         webpage = self._download_webpage(url, video_id) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         data_url = unescapeHTML(self._search_regex( | 
					
						
							|  |  |  |             r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1', | 
					
						
							|  |  |  |             webpage, 'iframe player url', group='url')) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key()) |