| 
									
										
										
										
											2015-02-09 16:05:01 +01:00
										 |  |  | # coding: utf-8 | 
					
						
							| 
									
										
										
										
											2015-02-09 15:56:46 +01:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-28 21:25:04 +06:00
										 |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-09 15:56:46 +01:00
										 |  |  | from .common import InfoExtractor | 
					
						
							|  |  |  | from ..utils import ( | 
					
						
							|  |  |  |     determine_ext, | 
					
						
							| 
									
										
										
										
											2016-06-21 17:55:53 +08:00
										 |  |  |     dict_get, | 
					
						
							| 
									
										
										
										
											2016-06-22 23:36:07 +07:00
										 |  |  |     int_or_none, | 
					
						
							|  |  |  |     try_get, | 
					
						
							| 
									
										
										
										
											2015-02-09 15:56:46 +01:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-09 00:12:42 +06:00
										 |  |  | class SVTBaseIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2016-06-22 23:36:07 +07:00
										 |  |  |     def _extract_video(self, video_info, video_id): | 
					
						
							| 
									
										
										
										
											2015-02-09 15:56:46 +01:00
										 |  |  |         formats = [] | 
					
						
							|  |  |  |         for vr in video_info['videoReferences']: | 
					
						
							| 
									
										
										
										
											2016-03-15 19:33:09 +01:00
										 |  |  |             player_type = vr.get('playerType') | 
					
						
							| 
									
										
										
										
											2015-02-09 15:56:46 +01:00
										 |  |  |             vurl = vr['url'] | 
					
						
							| 
									
										
										
										
											2015-02-28 21:25:04 +06:00
										 |  |  |             ext = determine_ext(vurl) | 
					
						
							|  |  |  |             if ext == 'm3u8': | 
					
						
							| 
									
										
										
										
											2015-02-09 15:56:46 +01:00
										 |  |  |                 formats.extend(self._extract_m3u8_formats( | 
					
						
							|  |  |  |                     vurl, video_id, | 
					
						
							|  |  |  |                     ext='mp4', entry_protocol='m3u8_native', | 
					
						
							| 
									
										
										
										
											2016-03-15 19:33:09 +01:00
										 |  |  |                     m3u8_id=player_type, fatal=False)) | 
					
						
							| 
									
										
										
										
											2015-02-28 21:25:04 +06:00
										 |  |  |             elif ext == 'f4m': | 
					
						
							|  |  |  |                 formats.extend(self._extract_f4m_formats( | 
					
						
							|  |  |  |                     vurl + '?hdcore=3.3.0', video_id, | 
					
						
							| 
									
										
										
										
											2016-03-15 19:33:09 +01:00
										 |  |  |                     f4m_id=player_type, fatal=False)) | 
					
						
							|  |  |  |             elif ext == 'mpd': | 
					
						
							|  |  |  |                 if player_type == 'dashhbbtv': | 
					
						
							|  |  |  |                     formats.extend(self._extract_mpd_formats( | 
					
						
							|  |  |  |                         vurl, video_id, mpd_id=player_type, fatal=False)) | 
					
						
							| 
									
										
										
										
											2015-02-09 15:56:46 +01:00
										 |  |  |             else: | 
					
						
							|  |  |  |                 formats.append({ | 
					
						
							| 
									
										
										
										
											2016-03-15 19:33:09 +01:00
										 |  |  |                     'format_id': player_type, | 
					
						
							| 
									
										
										
										
											2015-02-09 15:56:46 +01:00
										 |  |  |                     'url': vurl, | 
					
						
							|  |  |  |                 }) | 
					
						
							| 
									
										
										
										
											2016-06-22 23:36:07 +07:00
										 |  |  |         if not formats and video_info.get('rights', {}).get('geoBlockedSweden'): | 
					
						
							|  |  |  |             self.raise_geo_restricted('This video is only available in Sweden') | 
					
						
							| 
									
										
										
										
											2015-02-09 15:56:46 +01:00
										 |  |  |         self._sort_formats(formats) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-01-12 17:08:55 +01:00
										 |  |  |         subtitles = {} | 
					
						
							| 
									
										
										
										
											2016-06-21 17:55:53 +08:00
										 |  |  |         subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences')) | 
					
						
							| 
									
										
										
										
											2016-01-23 01:47:54 +06:00
										 |  |  |         if isinstance(subtitle_references, list): | 
					
						
							|  |  |  |             for sr in subtitle_references: | 
					
						
							|  |  |  |                 subtitle_url = sr.get('url') | 
					
						
							| 
									
										
										
										
											2016-06-21 17:55:53 +08:00
										 |  |  |                 subtitle_lang = sr.get('language', 'sv') | 
					
						
							| 
									
										
										
										
											2016-01-23 01:47:54 +06:00
										 |  |  |                 if subtitle_url: | 
					
						
							| 
									
										
										
										
											2016-06-21 17:55:53 +08:00
										 |  |  |                     if determine_ext(subtitle_url) == 'm3u8': | 
					
						
							|  |  |  |                         # TODO(yan12125): handle WebVTT in m3u8 manifests | 
					
						
							|  |  |  |                         continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                     subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url}) | 
					
						
							| 
									
										
										
										
											2016-01-12 17:08:55 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-06-22 23:36:07 +07:00
										 |  |  |         title = video_info.get('title') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         series = video_info.get('programTitle') | 
					
						
							|  |  |  |         season_number = int_or_none(video_info.get('season')) | 
					
						
							|  |  |  |         episode = video_info.get('episodeTitle') | 
					
						
							|  |  |  |         episode_number = int_or_none(video_info.get('episodeNumber')) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration'))) | 
					
						
							|  |  |  |         age_limit = None | 
					
						
							|  |  |  |         adult = dict_get( | 
					
						
							|  |  |  |             video_info, ('inappropriateForChildren', 'blockedForChildren'), | 
					
						
							|  |  |  |             skip_false_values=False) | 
					
						
							|  |  |  |         if adult is not None: | 
					
						
							|  |  |  |             age_limit = 18 if adult else 0 | 
					
						
							| 
									
										
										
										
											2015-02-09 15:56:46 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             'id': video_id, | 
					
						
							| 
									
										
										
										
											2016-06-22 23:36:07 +07:00
										 |  |  |             'title': title, | 
					
						
							| 
									
										
										
										
											2015-02-09 15:56:46 +01:00
										 |  |  |             'formats': formats, | 
					
						
							| 
									
										
										
										
											2016-01-12 17:08:55 +01:00
										 |  |  |             'subtitles': subtitles, | 
					
						
							| 
									
										
										
										
											2015-02-09 15:56:46 +01:00
										 |  |  |             'duration': duration, | 
					
						
							| 
									
										
										
										
											2015-02-28 21:25:04 +06:00
										 |  |  |             'age_limit': age_limit, | 
					
						
							| 
									
										
										
										
											2016-06-22 23:36:07 +07:00
										 |  |  |             'series': series, | 
					
						
							|  |  |  |             'season_number': season_number, | 
					
						
							|  |  |  |             'episode': episode, | 
					
						
							|  |  |  |             'episode_number': episode_number, | 
					
						
							| 
									
										
										
										
											2015-02-09 15:56:46 +01:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2015-05-09 00:12:42 +06:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class SVTIE(SVTBaseIE): | 
					
						
							|  |  |  |     _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)' | 
					
						
							|  |  |  |     _TEST = { | 
					
						
							|  |  |  |         'url': 'http://www.svt.se/wd?widgetId=23991§ionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false', | 
					
						
							| 
									
										
										
										
											2016-06-21 17:55:53 +08:00
										 |  |  |         'md5': '33e9a5d8f646523ce0868ecfb0eed77d', | 
					
						
							| 
									
										
										
										
											2015-05-09 00:12:42 +06:00
										 |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '2900353', | 
					
						
							| 
									
										
										
										
											2016-06-21 17:55:53 +08:00
										 |  |  |             'ext': 'mp4', | 
					
						
							|  |  |  |             'title': 'Stjärnorna skojar till det - under SVT-intervjun', | 
					
						
							| 
									
										
										
										
											2015-05-09 00:12:42 +06:00
										 |  |  |             'duration': 27, | 
					
						
							|  |  |  |             'age_limit': 0, | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-09 00:23:35 +06:00
										 |  |  |     @staticmethod | 
					
						
							|  |  |  |     def _extract_url(webpage): | 
					
						
							|  |  |  |         mobj = re.search( | 
					
						
							|  |  |  |             r'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % SVTIE._VALID_URL, webpage) | 
					
						
							|  |  |  |         if mobj: | 
					
						
							|  |  |  |             return mobj.group('url') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-09 00:12:42 +06:00
										 |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							|  |  |  |         widget_id = mobj.group('widget_id') | 
					
						
							|  |  |  |         article_id = mobj.group('id') | 
					
						
							| 
									
										
										
										
											2016-06-21 17:55:53 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         info = self._download_json( | 
					
						
							| 
									
										
										
										
											2015-05-09 00:12:42 +06:00
										 |  |  |             'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id), | 
					
						
							|  |  |  |             article_id) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-06-22 23:36:07 +07:00
										 |  |  |         info_dict = self._extract_video(info['video'], article_id) | 
					
						
							| 
									
										
										
										
											2016-06-21 17:55:53 +08:00
										 |  |  |         info_dict['title'] = info['context']['title'] | 
					
						
							|  |  |  |         return info_dict | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-09 00:12:42 +06:00
										 |  |  | 
 | 
					
						
							|  |  |  | class SVTPlayIE(SVTBaseIE): | 
					
						
							|  |  |  |     IE_DESC = 'SVT Play and Öppet arkiv' | 
					
						
							| 
									
										
										
										
											2016-06-26 00:29:53 +07:00
										 |  |  |     _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp)/(?P<id>[0-9]+)' | 
					
						
							| 
									
										
										
										
											2016-06-22 23:36:07 +07:00
										 |  |  |     _TESTS = [{ | 
					
						
							| 
									
										
										
										
											2016-01-23 01:47:54 +06:00
										 |  |  |         'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2', | 
					
						
							|  |  |  |         'md5': '2b6704fe4a28801e1a098bbf3c5ac611', | 
					
						
							| 
									
										
										
										
											2015-05-09 00:12:42 +06:00
										 |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2016-01-23 01:47:54 +06:00
										 |  |  |             'id': '5996901', | 
					
						
							|  |  |  |             'ext': 'mp4', | 
					
						
							|  |  |  |             'title': 'Flygplan till Haile Selassie', | 
					
						
							|  |  |  |             'duration': 3527, | 
					
						
							| 
									
										
										
										
											2015-05-09 00:12:42 +06:00
										 |  |  |             'thumbnail': 're:^https?://.*[\.-]jpg$', | 
					
						
							|  |  |  |             'age_limit': 0, | 
					
						
							| 
									
										
										
										
											2016-01-23 01:47:54 +06:00
										 |  |  |             'subtitles': { | 
					
						
							|  |  |  |                 'sv': [{ | 
					
						
							|  |  |  |                     'ext': 'wsrt', | 
					
						
							|  |  |  |                 }] | 
					
						
							|  |  |  |             }, | 
					
						
							| 
									
										
										
										
											2015-05-09 00:12:42 +06:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2016-06-22 23:36:07 +07:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         # geo restricted to Sweden | 
					
						
							|  |  |  |         'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2016-06-26 00:29:53 +07:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							| 
									
										
										
										
											2016-06-22 23:36:07 +07:00
										 |  |  |     }] | 
					
						
							| 
									
										
										
										
											2016-06-21 17:55:53 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-09 00:12:42 +06:00
										 |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2016-06-21 17:55:53 +08:00
										 |  |  |         video_id = self._match_id(url) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         webpage = self._download_webpage(url, video_id) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-06-22 23:36:07 +07:00
										 |  |  |         data = self._parse_json( | 
					
						
							|  |  |  |             self._search_regex( | 
					
						
							|  |  |  |                 r'root\["__svtplay"\]\s*=\s*([^;]+);', | 
					
						
							|  |  |  |                 webpage, 'embedded data', default='{}'), | 
					
						
							|  |  |  |             video_id, fatal=False) | 
					
						
							| 
									
										
										
										
											2016-06-21 17:55:53 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         thumbnail = self._og_search_thumbnail(webpage) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-06-22 23:36:07 +07:00
										 |  |  |         if data: | 
					
						
							|  |  |  |             video_info = try_get( | 
					
						
							|  |  |  |                 data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'], | 
					
						
							|  |  |  |                 dict) | 
					
						
							|  |  |  |             if video_info: | 
					
						
							|  |  |  |                 info_dict = self._extract_video(video_info, video_id) | 
					
						
							|  |  |  |                 info_dict.update({ | 
					
						
							|  |  |  |                     'title': data['context']['dispatcher']['stores']['MetaStore']['title'], | 
					
						
							|  |  |  |                     'thumbnail': thumbnail, | 
					
						
							|  |  |  |                 }) | 
					
						
							|  |  |  |                 return info_dict | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         video_id = self._search_regex( | 
					
						
							|  |  |  |             r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)', | 
					
						
							|  |  |  |             webpage, 'video id', default=None) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if video_id: | 
					
						
							|  |  |  |             data = self._download_json( | 
					
						
							|  |  |  |                 'http://www.svt.se/videoplayer-api/video/%s' % video_id, video_id) | 
					
						
							|  |  |  |             info_dict = self._extract_video(data, video_id) | 
					
						
							|  |  |  |             if not info_dict.get('title'): | 
					
						
							|  |  |  |                 info_dict['title'] = re.sub( | 
					
						
							|  |  |  |                     r'\s*\|\s*.+?$', '', | 
					
						
							|  |  |  |                     info_dict.get('episode') or self._og_search_title(webpage)) | 
					
						
							|  |  |  |             return info_dict |