l1ving_youtube-dl/youtube_dl/extractor/memri.py

# coding: utf-8
from __future__ import unicode_literals

import re
from ..utils import (
    ExtractorError,
    unescapeHTML,
    js_to_json,
)
from .common import InfoExtractor


class MemriIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?memri(?:tv)?.org/(?:clip(?:/[^/]+)*/(?P<id>\d+)\.html?|.+clip_id=(?P<eid>\d+))'
    IE_NAME = 'memri'
    _TESTS = [{
        'url': 'http://www.memritv.org/clip/en/4496.htm',
        'info_dict': {
            'id': '4496',
            'ext': 'mp4',
            'title': 'Takfiri, The Caliph\'s Favorite Cheese - Anti-ISIS Iraqi Satire',
            'uploader': 'Memri',
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id') or mobj.group('eid')
        rurl = url
        if mobj.groupdict().get('eid') is None:
            rurl = 'http://www.memritv.org/embedded_player/index.php?clip_id=' + video_id

        webpage = self._download_webpage(rurl, video_id)
        jstr = self._search_regex(r'var config_overrides =.+?({.+?});', webpage, 'json', flags=re.DOTALL)
        jstr = re.sub(r'\n\s*//.*?\n', '\n', jstr)  # // comments break js_to_json
        js = self._parse_json(jstr, 'json', transform_source=js_to_json)

        formats = []
        for ent in js['media']['source']:
            eurl = ent.get('src')
            if ent.get('type', '') == 'application/x-mpegURL':
                formats.extend(self._extract_m3u8_formats(
                    eurl, video_id, entry_protocol='m3u8', ext='mp4',
                    m3u8_id='m3u8-mp4',
                    preference=0)
                )
                continue
            proto = re.search(r'^(.+?)://', eurl).group(1)
            format = {
                'url': eurl,
                'ext': 'mp4',
                'protocol': proto,
                'format_id': proto + '-mp4',
            }
            if proto == 'rtmp':
                urlre = re.search(r'^(.+?)(mp4:[^\?]+)(.+)', eurl)
                format['url'] = urlre.group(1) + urlre.group(3)
                format['play_path'] = urlre.group(2)
            formats.append(format)
        if not formats:
            if self._downloader.params.get('verbose', False):
                raise ExtractorError('No video found in ' + jstr + '\n')
            else:
                raise ExtractorError('No video found')

        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': unescapeHTML(js['media']['title']),
            'uploader': 'Memri',
            'formats': formats,
        }
Support memritv.org 2015-07-13 07:43:48 -05:00			`# coding: utf-8`
			`from __future__ import unicode_literals`

			`import re`
			`from ..utils import (`
			`ExtractorError,`
			`unescapeHTML,`
			`js_to_json,`
			`)`
			`from .common import InfoExtractor`


			`class MemriIE(InfoExtractor):`
			`_VALID_URL = r'https?://(?:www\.)?memri(?:tv)?.org/(?:clip(?:/[^/]+)*/(?P<id>\d+)\.html?\|.+clip_id=(?P<eid>\d+))'`
			`IE_NAME = 'memri'`
			`_TESTS = [{`
			`'url': 'http://www.memritv.org/clip/en/4496.htm',`
			`'info_dict': {`
			`'id': '4496',`
			`'ext': 'mp4',`
			`'title': 'Takfiri, The Caliph\'s Favorite Cheese - Anti-ISIS Iraqi Satire',`
			`'uploader': 'Memri',`
			`},`
			`'params': {`
			`# m3u8 download`
			`'skip_download': True,`
			`},`
			`}]`

			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
			`video_id = mobj.group('id') or mobj.group('eid')`
			`rurl = url`
memri: fix syntax 2015-07-17 03:02:57 -05:00			`if mobj.groupdict().get('eid') is None:`
			`rurl = 'http://www.memritv.org/embedded_player/index.php?clip_id=' + video_id`
Support memritv.org 2015-07-13 07:43:48 -05:00
			`webpage = self._download_webpage(rurl, video_id)`
memri: fix syntax 2015-07-17 03:02:57 -05:00			`jstr = self._search_regex(r'var config_overrides =.+?({.+?});', webpage, 'json', flags=re.DOTALL)`
			`jstr = re.sub(r'\n\s//.?\n', '\n', jstr) # // comments break js_to_json`
			`js = self._parse_json(jstr, 'json', transform_source=js_to_json)`
Support memritv.org 2015-07-13 07:43:48 -05:00
			`formats = []`
			`for ent in js['media']['source']:`
			`eurl = ent.get('src')`
memri: fix syntax 2015-07-17 03:02:57 -05:00			`if ent.get('type', '') == 'application/x-mpegURL':`
			`formats.extend(self._extract_m3u8_formats(`
			`eurl, video_id, entry_protocol='m3u8', ext='mp4',`
			`m3u8_id='m3u8-mp4',`
			`preference=0)`
Support memritv.org 2015-07-13 07:43:48 -05:00			`)`
			`continue`
memri: fix syntax 2015-07-17 03:02:57 -05:00			`proto = re.search(r'^(.+?)://', eurl).group(1)`
Support memritv.org 2015-07-13 07:43:48 -05:00			`format = {`
memri: fix syntax 2015-07-17 03:02:57 -05:00			`'url': eurl,`
			`'ext': 'mp4',`
			`'protocol': proto,`
			`'format_id': proto + '-mp4',`
Support memritv.org 2015-07-13 07:43:48 -05:00			`}`
			`if proto == 'rtmp':`
memri: fix syntax 2015-07-17 03:02:57 -05:00			`urlre = re.search(r'^(.+?)(mp4:[^\?]+)(.+)', eurl)`
			`format['url'] = urlre.group(1) + urlre.group(3)`
Support memritv.org 2015-07-13 07:43:48 -05:00			`format['play_path'] = urlre.group(2)`
			`formats.append(format)`
			`if not formats:`
			`if self._downloader.params.get('verbose', False):`
memri: fix syntax 2015-07-17 03:02:57 -05:00			`raise ExtractorError('No video found in ' + jstr + '\n')`
Support memritv.org 2015-07-13 07:43:48 -05:00			`else:`
			`raise ExtractorError('No video found')`

			`self._sort_formats(formats)`
			`return {`
memri: fix syntax 2015-07-17 03:02:57 -05:00			`'id': video_id,`
			`'title': unescapeHTML(js['media']['title']),`
			`'uploader': 'Memri',`
			`'formats': formats,`
Support memritv.org 2015-07-13 07:43:48 -05:00			`}`