l1ving_youtube-dl/youtube_dl/extractor/cba.py

# coding: utf-8
from __future__ import unicode_literals

import datetime
import os

from .common import InfoExtractor
from ..utils import (
    clean_html,
    ExtractorError,
    strip_bom_utf8,
    RegexNotFoundError,
    UnavailableVideoError,
    update_url_query,
)

class CBAIE(InfoExtractor):
    IE_NAME = 'cba'
    IE_DESC = 'cultural broadcasting archive'
    _VALID_URL = r'https?://(?:www\.)?cba\.fro\.at/(?P<id>[0-9]+)'
    _TEST = {
        'url': 'https://cba.fro.at/320619',
        'md5': 'e40379688fcc5e95d6d8a482bb665b02',
        'info_dict': {
            'id': '320619',
            'ext': 'mp3',
            'title': 'Radio Netwatcher Classics vom 15.7.2016 – Peter Pilz, Sicherheitssprecher Grüne über die nationale Entwicklung zum Überwachungsstaat',
            'url': 'https://cba.fro.at/wp-content/uploads/radio_netwatcher/netwatcher-20160715.mp3',
        }
    }
    _FORMATS = {
        'audio/ogg': {'id': '1', 'ext': 'ogg', 'preference': 100},
        'audio/mpeg': {'id': '2', 'ext': 'mp3', 'preference': 50}
    }
    _API_KEY = None

    def __init__(self, *args, **kwargs):
        try:
            self._API_KEY = os.environ["CBA_API_KEY"]
        except KeyError:
            pass

    def _add_optional_parameter(self, formats, name, data, key, convert=None):
        try:
            param = data[key]
            if convert:
                param = convert(param)
            formats[name] = param
        except KeyError:
            pass

    def _real_extract(self, url):
        video_id = self._match_id(url)
        api_posts_url = "https://cba.fro.at/wp-json/wp/v2/posts/%s" % video_id
        api_media_url = "https://cba.fro.at/wp-json/wp/v2/media?media_type=audio&parent=%s" % video_id

        title = 'unknown'
        description = ''
        formats = []

        posts_result = self._download_json(api_posts_url, video_id, 'query posts api-endpoint',
                                           'unable to query posts api-endpoint', transform_source=strip_bom_utf8)
        try:
            title = clean_html(posts_result['title']['rendered'])
            description = clean_html(posts_result['content']['rendered'])
        except KeyError:
            pass

        api_key_str = " (without API_KEY)"
        if self._API_KEY:
            api_key_str =  " (using API_KEY '%s')" % self._API_KEY
            api_media_url = update_url_query(api_media_url, {'c': self._API_KEY})

        media_result = self._download_json(api_media_url, video_id, 'query media api-endpoint%s' % api_key_str,
                                         'unable to qeury media api-endpoint%s' % api_key_str, transform_source=strip_bom_utf8)
        for media in media_result:
            try:
                url = media['source_url']
                if url == "":
                    continue

                ft = media['mime_type']
                f = { 'url': url, 'format': ft, 'format_id': self._FORMATS[ft]['id'], 'preference': self._FORMATS[ft]['preference'] }
                self._add_optional_parameter(f, 'filesize', media['media_details'], 'filesize')
                self._add_optional_parameter(f, 'abr', media['media_details'], 'bitrate', lambda x: x/1000)
                self._add_optional_parameter(f, 'asr', media['media_details'], 'sample_rate')

                formats.append(f)
            except KeyError:
                pass

        if len(formats) == 0:
            if self._API_KEY:
                raise ExtractorError('unable to fetch CBA entry')
            else:
                raise UnavailableVideoError('you may need an API key to download copyright protected files')

        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'formats': formats,
        }