l1ving_youtube-dl/youtube_dl/extractor/telecinco.py

# coding: utf-8
from __future__ import unicode_literals

import json
import re

from .common import InfoExtractor


class TelecincoIE(InfoExtractor):
    IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
    _VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'

    _TESTS = [{
        'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
        'info_dict': {
            'id': '1876350223',
            'title': 'Con Martín Berasategui, hacer un bacalao al pil-pil es fácil y divertido',
            'ext': 'm3u8'
        }
    }, {
        'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
        'md5': '9468140ebc300fbb8b9d65dc6e5c4b43',
        'info_dict': {
            'id': 'jn24Od1zGLG4XUZcnUnZB6',
            'ext': 'mp4',
            'title': '¿Quién es este ex futbolista con el que hablan Leo Messi y Luis Suárez?',
            'description': 'md5:a62ecb5f1934fc787107d7b9a2262805',
            'duration': 79,
        },
    }, {
        'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
        'md5': 'ae2dc6b7b50b2392076a51c0f70e01f6',
        'info_dict': {
            'id': 'aywerkD2Sv1vGNqq9b85Q2',
            'ext': 'mp4',
            'title': '#DOYLACARA. Con la trata no hay trato',
            'description': 'md5:2771356ff7bfad9179c5f5cd954f1477',
            'duration': 50,
        },
    }, {
        'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
        'only_matching': True,
    }, {
        'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html',
        'only_matching': True,
    }, {
        # ooyala video
        'url': 'http://www.cuatro.com/chesterinlove/a-carta/chester-chester_in_love-chester_edu_2_2331030022.html',
        'only_matching': True,
    }]

    def _real_extract(self, url):

        p = '(?P<host>:?[http|https].*://[^:/ ]+).?(?P<port>[0-9]*).*'
        m = re.search(p, url)
        host = m.group('host')

        (url_title, _, video_id) = self._match_id(url).split('_')
        webpage = self._download_webpage(url, video_id)

        m = re.search(r'dataConfig":"(?P<path>.*?)"', webpage)
        path = m.group('path')
        final = self._download_json(host + path, video_id)
        title = final['info']['title']
        mmc = final['services']['mmc']
        if not mmc.startswith('http'):
            mmc = 'http:' + mmc
        res = self._download_json(mmc, video_id)
        sta = 0
        location = res['locations'][sta]
        gateurl = 'https:' + location['gat']
        gcp = location['gcp']
        ogn = location['ogn']
        payload = {'sta': sta, 'gcp': gcp, 'ogn': ogn}
        res = self._download_json(gateurl, video_id, data=str.encode(json.dumps(payload)), headers={'Content-Type': 'application/json'})
        duration = res.get('duration')
        m8u_url = res['stream'].split('/master.m3u8')[0] + '/index_0_av.m3u8?null=0'

        response = {
            'id': video_id,
            'url': m8u_url,
            'title': title,
            'duration': duration
        }

        return response
PEP8 applied 2014-11-23 20:41:03 +01:00			`# coding: utf-8`
[telecinco] Add extractor (closes #4005) It uses the same extraction process as mitele.es, but with a few small differences. 2014-10-23 20:08:55 +02:00			`from __future__ import unicode_literals`

[telecinco] fix extraction(closes #17080) 2018-07-28 06:55:18 +01:00			`import json`
			`import re`
[telecinco] Add extractor (closes #4005) It uses the same extraction process as mitele.es, but with a few small differences. 2014-10-23 20:08:55 +02:00
[telecinco] fix extraction(closes #17080) 2018-07-28 06:55:18 +01:00			`from .common import InfoExtractor`
[telecinco] Add extractor (closes #4005) It uses the same extraction process as mitele.es, but with a few small differences. 2014-10-23 20:08:55 +02:00
[telecinco] fix extraction(closes #17080) 2018-07-28 06:55:18 +01:00
			`class TelecincoIE(InfoExtractor):`
[telecinco] Fix typo 2015-09-16 22:49:26 +06:00			`IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'`
Improve some _VALID_URLs 2016-09-08 18:29:05 +07:00			`_VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es\|cuatro\.com\|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'`
[telecinco] Add extractor (closes #4005) It uses the same extraction process as mitele.es, but with a few small differences. 2014-10-23 20:08:55 +02:00
[telecinco] Recognize more urls (closes #5065) 2015-02-25 23:52:54 +01:00			`_TESTS = [{`
[telecinco] Add extractor (closes #4005) It uses the same extraction process as mitele.es, but with a few small differences. 2014-10-23 20:08:55 +02:00			`'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',`
			`'info_dict': {`
[telecinco] fix extraction(closes #17080) 2018-07-28 06:55:18 +01:00			`'id': '1876350223',`
[telecinco] works again 2020-02-29 18:50:07 +01:00			`'title': 'Con Martín Berasategui, hacer un bacalao al pil-pil es fácil y divertido',`
			`'ext': 'm3u8'`
			`}`
[telecinco] Add support or cuatro.com 2015-09-16 22:37:01 +06:00			`}, {`
			`'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',`
[telecinco] fix extraction(closes #17080) 2018-07-28 06:55:18 +01:00			`'md5': '9468140ebc300fbb8b9d65dc6e5c4b43',`
[telecinco] Add support or cuatro.com 2015-09-16 22:37:01 +06:00			`'info_dict': {`
[telecinco] fix info extraction 2016-07-06 23:08:44 +01:00			`'id': 'jn24Od1zGLG4XUZcnUnZB6',`
[telecinco] Add support or cuatro.com 2015-09-16 22:37:01 +06:00			`'ext': 'mp4',`
[telecinco] fix info extraction 2016-07-06 23:08:44 +01:00			`'title': '¿Quién es este ex futbolista con el que hablan Leo Messi y Luis Suárez?',`
			`'description': 'md5:a62ecb5f1934fc787107d7b9a2262805',`
[telecinco] Add support or cuatro.com 2015-09-16 22:37:01 +06:00			`'duration': 79,`
			`},`
[telecinco] Add support for mediaset.es 2015-09-16 22:45:39 +06:00			`}, {`
			`'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',`
[telecinco] fix extraction(closes #17080) 2018-07-28 06:55:18 +01:00			`'md5': 'ae2dc6b7b50b2392076a51c0f70e01f6',`
[telecinco] Add support for mediaset.es 2015-09-16 22:45:39 +06:00			`'info_dict': {`
[telecinco] fix info extraction 2016-07-06 23:08:44 +01:00			`'id': 'aywerkD2Sv1vGNqq9b85Q2',`
[telecinco] Add support for mediaset.es 2015-09-16 22:45:39 +06:00			`'ext': 'mp4',`
			`'title': '#DOYLACARA. Con la trata no hay trato',`
[telecinco] fix info extraction 2016-07-06 23:08:44 +01:00			`'description': 'md5:2771356ff7bfad9179c5f5cd954f1477',`
[telecinco] Add support for mediaset.es 2015-09-16 22:45:39 +06:00			`'duration': 50,`
			`},`
[telecinco] Recognize more urls (closes #5065) 2015-02-25 23:52:54 +01:00			`}, {`
			`'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',`
			`'only_matching': True,`
[telecinco] Relax _VALID_URL (Closes #6601) 2015-08-18 20:02:56 +06:00			`}, {`
			`'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html',`
			`'only_matching': True,`
[telecinco] Add test for #12430 2017-03-13 22:41:28 +07:00			`}, {`
			`# ooyala video`
			`'url': 'http://www.cuatro.com/chesterinlove/a-carta/chester-chester_in_love-chester_edu_2_2331030022.html',`
			`'only_matching': True,`
[telecinco] Recognize more urls (closes #5065) 2015-02-25 23:52:54 +01:00			`}]`
[telecinco] Incorporate mitele code 2015-09-16 22:25:25 +06:00
[telecinco] works again 2020-02-29 18:50:07 +01:00			`def _real_extract(self, url):`
[telecinco] fix extraction(closes #17080) 2018-07-28 06:55:18 +01:00
[telecinco] works again 2020-02-29 18:50:07 +01:00			`p = '(?P<host>:?[http\|https].://[^:/ ]+).?(?P<port>[0-9]).*'`
			`m = re.search(p, url)`
			`host = m.group('host')`
[telecinco] fix extraction(closes #17080) 2018-07-28 06:55:18 +01:00
[telecinco] works again 2020-02-29 18:50:07 +01:00			`(url_title, _, video_id) = self._match_id(url).split('_')`
			`webpage = self._download_webpage(url, video_id)`
[telecinco] fix extraction(closes #17080) 2018-07-28 06:55:18 +01:00
[telecinco] works again 2020-02-29 18:50:07 +01:00			`m = re.search(r'dataConfig":"(?P<path>.*?)"', webpage)`
			`path = m.group('path')`
			`final = self._download_json(host + path, video_id)`
			`title = final['info']['title']`
			`mmc = final['services']['mmc']`
			`if not mmc.startswith('http'):`
			`mmc = 'http:' + mmc`
			`res = self._download_json(mmc, video_id)`
			`sta = 0`
			`location = res['locations'][sta]`
			`gateurl = 'https:' + location['gat']`
			`gcp = location['gcp']`
			`ogn = location['ogn']`
			`payload = {'sta': sta, 'gcp': gcp, 'ogn': ogn}`
			`res = self._download_json(gateurl, video_id, data=str.encode(json.dumps(payload)), headers={'Content-Type': 'application/json'})`
			`duration = res.get('duration')`
			`m8u_url = res['stream'].split('/master.m3u8')[0] + '/index_0_av.m3u8?null=0'`

			`response = {`
[telecinco] fix extraction(closes #17080) 2018-07-28 06:55:18 +01:00			`'id': video_id,`
[telecinco] works again 2020-02-29 18:50:07 +01:00			`'url': m8u_url,`
[telecinco] fix extraction(closes #17080) 2018-07-28 06:55:18 +01:00			`'title': title,`
[telecinco] works again 2020-02-29 18:50:07 +01:00			`'duration': duration`
[telecinco] fix extraction(closes #17080) 2018-07-28 06:55:18 +01:00			`}`

[telecinco] works again 2020-02-29 18:50:07 +01:00			`return response`