2013-11-20 06:13:19 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# coding: utf-8
							 | 
						
					
						
							
								
									
										
										
										
											2014-02-05 19:02:03 +07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from __future__ import unicode_literals
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2017-12-02 20:22:17 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								import re
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-20 06:13:19 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								from .common import InfoExtractor
							 | 
						
					
						
							
								
									
										
										
										
											2016-09-16 17:36:22 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from ..utils import (
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    int_or_none,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    js_to_json,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    urlencode_postdata,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    extract_attributes,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    smuggle_url,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								)
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-20 06:13:19 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								class TouTvIE(InfoExtractor):
							 | 
						
					
						
							
								
									
										
										
										
											2016-09-16 17:36:22 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    _NETRC_MACHINE = 'toutv'
							 | 
						
					
						
							
								
									
										
										
										
											2014-02-05 19:02:03 +07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    IE_NAME = 'tou.tv'
							 | 
						
					
						
							
								
									
										
										
										
											2017-12-12 11:11:44 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+[EC][0-9]+)?)'
							 | 
						
					
						
							
								
									
										
										
										
											2016-09-16 17:36:22 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    _access_token = None
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    _claims = None
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-20 06:13:19 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2016-11-05 03:24:42 +07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    _TESTS = [{
							 | 
						
					
						
							
								
									
										
										
										
											2016-05-24 16:06:02 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17',
							 | 
						
					
						
							
								
									
										
										
										
											2014-02-05 19:02:03 +07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        'info_dict': {
							 | 
						
					
						
							
								
									
										
										
										
											2016-05-24 16:06:02 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            'id': '122017',
							 | 
						
					
						
							
								
									
										
										
										
											2015-02-01 15:01:33 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            'ext': 'mp4',
							 | 
						
					
						
							
								
									
										
										
										
											2016-05-24 16:06:02 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            'title': 'Saison 2015 Épisode 17',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            'description': 'La photo de famille 2',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            'upload_date': '20100717',
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-20 06:13:19 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        },
							 | 
						
					
						
							
								
									
										
										
										
											2014-02-05 19:02:03 +07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        'params': {
							 | 
						
					
						
							
								
									
										
										
										
											2016-05-24 16:06:02 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            # m3u8 download
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            'skip_download': True,
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-20 06:13:19 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        },
							 | 
						
					
						
							
								
									
										
										
										
											2016-09-16 17:36:22 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        'skip': '404 Not Found',
							 | 
						
					
						
							
								
									
										
										
										
											2016-11-05 03:24:42 +07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    }, {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        'url': 'http://ici.tou.tv/hackers',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        'only_matching': True,
							 | 
						
					
						
							
								
									
										
										
										
											2017-12-12 11:11:44 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    }, {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        'url': 'https://ici.tou.tv/l-age-adulte/S01C501',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        'only_matching': True,
							 | 
						
					
						
							
								
									
										
										
										
											2016-11-05 03:24:42 +07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    }]
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-20 06:13:19 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2016-09-16 17:36:22 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def _real_initialize(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        email, password = self._get_login_info()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if email is None:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            return
							 | 
						
					
						
							
								
									
										
										
										
											2017-12-02 20:22:17 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        state = 'http://ici.tou.tv/'
							 | 
						
					
						
							
								
									
										
										
										
											2016-09-16 17:36:22 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        webpage = self._download_webpage(state, None, 'Downloading homepage')
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        toutvlogin = self._parse_json(self._search_regex(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            r'(?s)toutvlogin\s*=\s*({.+?});', webpage, 'toutvlogin'), None, js_to_json)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        authorize_url = toutvlogin['host'] + '/auth/oauth/v2/authorize'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        login_webpage = self._download_webpage(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            authorize_url, None, 'Downloading login page', query={
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                'client_id': toutvlogin['clientId'],
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                'redirect_uri': 'https://ici.tou.tv/login/loginCallback',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                'response_type': 'token',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                'scope': 'media-drmt openid profile email id.write media-validation.read.privileged',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                'state': state,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            })
							 | 
						
					
						
							
								
									
										
										
										
											2017-12-02 20:22:17 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        def extract_form_url_and_data(wp, default_form_url, form_spec_re=''):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            form, form_elem = re.search(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                r'(?s)((<form[^>]+?%s[^>]*?>).+?</form>)' % form_spec_re, wp).groups()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            form_data = self._hidden_inputs(form)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            form_url = extract_attributes(form_elem).get('action') or default_form_url
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            return form_url, form_data
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        post_url, form_data = extract_form_url_and_data(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            login_webpage,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            'https://services.radio-canada.ca/auth/oauth/v2/authorize/login',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            r'(?:id|name)="Form-login"')
							 | 
						
					
						
							
								
									
										
										
										
											2016-09-16 17:36:22 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        form_data.update({
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            'login-email': email,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            'login-password': password,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        })
							 | 
						
					
						
							
								
									
										
										
										
											2017-12-02 20:22:17 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        consent_webpage = self._download_webpage(
							 | 
						
					
						
							
								
									
										
										
										
											2016-09-16 17:36:22 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            post_url, None, 'Logging in', data=urlencode_postdata(form_data))
							 | 
						
					
						
							
								
									
										
										
										
											2017-12-02 20:22:17 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        post_url, form_data = extract_form_url_and_data(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            consent_webpage,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            'https://services.radio-canada.ca/auth/oauth/v2/authorize/consent')
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        _, urlh = self._download_webpage_handle(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            post_url, None, 'Following Redirection',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            data=urlencode_postdata(form_data))
							 | 
						
					
						
							
								
									
										
										
										
											2016-09-16 17:36:22 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        self._access_token = self._search_regex(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            urlh.geturl(), 'access token')
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self._claims = self._download_json(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            'https://services.radio-canada.ca/media/validation/v2/getClaims',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            None, 'Extracting Claims', query={
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                'token': self._access_token,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                'access_token': self._access_token,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            })['claims']
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-20 06:13:19 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def _real_extract(self, url):
							 | 
						
					
						
							
								
									
										
										
										
											2016-05-24 16:06:02 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        path = self._match_id(url)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        metadata = self._download_json('http://ici.tou.tv/presentation/%s' % path, path)
							 | 
						
					
						
							
								
									
										
										
										
											2017-08-23 22:28:09 +07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        # IsDrm does not necessarily mean the video is DRM protected (see
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # https://github.com/rg3/youtube-dl/issues/13994).
							 | 
						
					
						
							
								
									
										
										
										
											2016-09-16 17:36:22 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if metadata.get('IsDrm'):
							 | 
						
					
						
							
								
									
										
										
										
											2017-08-23 22:28:09 +07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            self.report_warning('This video is probably DRM protected.', path)
							 | 
						
					
						
							
								
									
										
										
										
											2016-05-24 16:06:02 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        video_id = metadata['IdMedia']
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        details = metadata['Details']
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        title = details['OriginalTitle']
							 | 
						
					
						
							
								
									
										
										
										
											2016-09-16 17:36:22 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        video_url = 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if self._access_token and self._claims:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            video_url = smuggle_url(video_url, {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                'access_token': self._access_token,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                'claims': self._claims,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            })
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-20 06:13:19 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return {
							 | 
						
					
						
							
								
									
										
										
										
											2016-05-24 16:06:02 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            '_type': 'url_transparent',
							 | 
						
					
						
							
								
									
										
										
										
											2016-09-16 17:36:22 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            'url': video_url,
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-20 06:13:19 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            'id': video_id,
							 | 
						
					
						
							
								
									
										
										
										
											2016-05-24 16:06:02 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            'title': title,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            'thumbnail': details.get('ImageUrl'),
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            'duration': int_or_none(details.get('LengthInSeconds')),
							 | 
						
					
						
							
								
									
										
										
										
											2013-11-20 06:13:19 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 |