l1ving_youtube-dl/youtube_dl/extractor/byutv.py

from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import ExtractorError


class BYUtvIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?byutv\.org/player/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
    _TESTS = [{
        'url': 'http://www.byutv.org/player/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
        'info_dict': {
            'id': '6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
            'display_id': 'studio-c-season-5-episode-5',
            'ext': 'mp4',
            'title': 'Season 5 Episode 5',
            'description': 'md5:e07269172baff037f8e8bf9956bc9747',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 1486.486,
        },
        'params': {
            'skip_download': True,
        },
        'add_ie': ['Ooyala'],
    }, {
        'url': 'http://www.byutv.org/player/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        display_id = mobj.group('display_id') or video_id

        webpage = self._download_webpage(url, display_id)
        episode_code = self._search_regex(
            r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')

        ep = self._parse_json(
            episode_code, display_id, transform_source=lambda s:
            re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s))

        if ep['providerType'] != 'Ooyala':
            raise ExtractorError('Unsupported provider %s' % ep['provider'])

        return {
            '_type': 'url_transparent',
            'ie_key': 'Ooyala',
            'url': 'ooyala:%s' % ep['providerId'],
            'id': video_id,
            'display_id': display_id,
            'title': ep['title'],
            'description': ep.get('description'),
            'thumbnail': ep.get('imageThumbnail'),
        }


class BYUtvEventIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?byutv\.org/player/event/(?P<id>[0-9a-f-]+)'
    _TEST = {
        'url': 'http://www.byutv.org/watch/event/29941b9b-8bf6-48d2-aebf-7a87add9e34b',
        'info_dict': {
            'id': '29941b9b-8bf6-48d2-aebf-7a87add9e34b',
            'ext': 'mp4',
            'title': 'Toledo vs. BYU (9/30/16)',
        },
        'params': {
            'skip_download': True,
        },
        'add_ie': ['Ooyala'],
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(url, video_id)

        ooyala_id = self._search_regex(
            r'providerId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
            webpage, 'ooyala id', group='id')

        title = self._search_regex(
            r'class=["\']description["\'][^>]*>\s*<h1>([^<]+)</h1>', webpage,
            'title').strip()

        return {
            '_type': 'url_transparent',
            'ie_key': 'Ooyala',
            'url': 'ooyala:%s' % ooyala_id,
            'id': video_id,
            'title': title,
        }
[byutv] Add support (Fixes #2612) 2014-03-24 17:12:15 +01:00			`from __future__ import unicode_literals`

			`import re`

			`from .common import InfoExtractor`
[byutv] Fix test 2014-04-10 19:37:55 +07:00			`from ..utils import ExtractorError`
[byutv] Add support (Fixes #2612) 2014-03-24 17:12:15 +01:00

			`class BYUtvIE(InfoExtractor):`
Fix some problems with BYU TV Ok, a bit of a beginner here. I ran into an issue when trying to download an episode of Studio C from BYU TV. Basically, any links from the site aren't recognized as valid links. This is a reasonably large problem, but it was also fairly easy to solve. Essentially, in the URL, they changed watch to player. So I changed that, and links were recognized again. That's progress. But, it still spits out the error "ERROR: Unable to extract episode information" I've narrowed this down to line 38 I believe. I don't really know how to fix this error, since it seems to be calling another function within common.py at line 2581. I don't really have the expertise to diagnose this any further. Hopefully someone else will see this and know what to do. I'm committing this because regardless of the fact that an error still exists, this fixes one. I would spend more time on this, but it's nearing 1 am and I have a feeling I'll forget what I was doing when I wake up. I'll create a issue and reference this pull request as well. Thanks for the help, Micah. 2017-12-13 00:40:19 -05:00			`_VALID_URL = r'https?://(?:www\.)?byutv\.org/player/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'`
[byutv] Fix id and display id 2016-10-02 00:44:54 +07:00			`_TESTS = [{`
Fix some problems with BYU TV Ok, a bit of a beginner here. I ran into an issue when trying to download an episode of Studio C from BYU TV. Basically, any links from the site aren't recognized as valid links. This is a reasonably large problem, but it was also fairly easy to solve. Essentially, in the URL, they changed watch to player. So I changed that, and links were recognized again. That's progress. But, it still spits out the error "ERROR: Unable to extract episode information" I've narrowed this down to line 38 I believe. I don't really know how to fix this error, since it seems to be calling another function within common.py at line 2581. I don't really have the expertise to diagnose this any further. Hopefully someone else will see this and know what to do. I'm committing this because regardless of the fact that an error still exists, this fixes one. I would spend more time on this, but it's nearing 1 am and I have a feeling I'll forget what I was doing when I wake up. I'll create a issue and reference this pull request as well. Thanks for the help, Micah. 2017-12-13 00:40:19 -05:00			`'url': 'http://www.byutv.org/player/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',`
[byutv] Add support (Fixes #2612) 2014-03-24 17:12:15 +01:00			`'info_dict': {`
[byutv] Fix id and display id 2016-10-02 00:44:54 +07:00			`'id': '6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',`
			`'display_id': 'studio-c-season-5-episode-5',`
[byutv] Add support (Fixes #2612) 2014-03-24 17:12:15 +01:00			`'ext': 'mp4',`
[byutv] Update test 2014-11-05 15:43:53 +01:00			`'title': 'Season 5 Episode 5',`
[byutv] Fix id and display id 2016-10-02 00:44:54 +07:00			`'description': 'md5:e07269172baff037f8e8bf9956bc9747',`
Fix "invalid escape sequences" error on Python 3.6 2017-01-02 20:08:07 +08:00			`'thumbnail': r're:^https?://.*\.jpg$',`
[ooyala] fix duration scale 2015-12-04 16:18:02 +01:00			`'duration': 1486.486,`
[byutv] Add support (Fixes #2612) 2014-03-24 17:12:15 +01:00			`},`
skip some tests to reduce test time 2016-05-24 16:42:22 +01:00			`'params': {`
			`'skip_download': True,`
			`},`
[ooyala] check manifest ext with determine_ext and update tests for related extractors 2016-05-24 11:24:29 +01:00			`'add_ie': ['Ooyala'],`
[byutv] Fix id and display id 2016-10-02 00:44:54 +07:00			`}, {`
Fix some problems with BYU TV Ok, a bit of a beginner here. I ran into an issue when trying to download an episode of Studio C from BYU TV. Basically, any links from the site aren't recognized as valid links. This is a reasonably large problem, but it was also fairly easy to solve. Essentially, in the URL, they changed watch to player. So I changed that, and links were recognized again. That's progress. But, it still spits out the error "ERROR: Unable to extract episode information" I've narrowed this down to line 38 I believe. I don't really know how to fix this error, since it seems to be calling another function within common.py at line 2581. I don't really have the expertise to diagnose this any further. Hopefully someone else will see this and know what to do. I'm committing this because regardless of the fact that an error still exists, this fixes one. I would spend more time on this, but it's nearing 1 am and I have a feeling I'll forget what I was doing when I wake up. I'll create a issue and reference this pull request as well. Thanks for the help, Micah. 2017-12-13 00:40:19 -05:00			`'url': 'http://www.byutv.org/player/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',`
[byutv] Fix id and display id 2016-10-02 00:44:54 +07:00			`'only_matching': True,`
			`}]`
[byutv] Add support (Fixes #2612) 2014-03-24 17:12:15 +01:00
			`def _real_extract(self, url):`
[byutv] Fix id and display id 2016-10-02 00:44:54 +07:00			`mobj = re.match(self._VALID_URL, url)`
			`video_id = mobj.group('id')`
			`display_id = mobj.group('display_id') or video_id`
[byutv] Add support (Fixes #2612) 2014-03-24 17:12:15 +01:00
[byutv] Fix id and display id 2016-10-02 00:44:54 +07:00			`webpage = self._download_webpage(url, display_id)`
[byutv] Add support (Fixes #2612) 2014-03-24 17:12:15 +01:00			`episode_code = self._search_regex(`
			`r'(?s)episode:(.?\}),\s\n', webpage, 'episode information')`
[byutv] Rely on _match_id and _parse_json 2016-09-30 19:59:08 +02:00
			`ep = self._parse_json(`
[byutv] Fix id and display id 2016-10-02 00:44:54 +07:00			`episode_code, display_id, transform_source=lambda s:`
[byutv] Rely on _match_id and _parse_json 2016-09-30 19:59:08 +02:00			`re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s))`
[byutv] Add support (Fixes #2612) 2014-03-24 17:12:15 +01:00
[byutv] Fix id and display id 2016-10-02 00:44:54 +07:00			`if ep['providerType'] != 'Ooyala':`
[byutv] Add support (Fixes #2612) 2014-03-24 17:12:15 +01:00			`raise ExtractorError('Unsupported provider %s' % ep['provider'])`
[byutv] Fix id and display id 2016-10-02 00:44:54 +07:00
			`return {`
			`'_type': 'url_transparent',`
			`'ie_key': 'Ooyala',`
			`'url': 'ooyala:%s' % ep['providerId'],`
			`'id': video_id,`
			`'display_id': display_id,`
			`'title': ep['title'],`
			`'description': ep.get('description'),`
			`'thumbnail': ep.get('imageThumbnail'),`
			`}`
[byutv:event] Add extractor 2016-10-02 00:50:07 +07:00

			`class BYUtvEventIE(InfoExtractor):`
Fix some problems with BYU TV Ok, a bit of a beginner here. I ran into an issue when trying to download an episode of Studio C from BYU TV. Basically, any links from the site aren't recognized as valid links. This is a reasonably large problem, but it was also fairly easy to solve. Essentially, in the URL, they changed watch to player. So I changed that, and links were recognized again. That's progress. But, it still spits out the error "ERROR: Unable to extract episode information" I've narrowed this down to line 38 I believe. I don't really know how to fix this error, since it seems to be calling another function within common.py at line 2581. I don't really have the expertise to diagnose this any further. Hopefully someone else will see this and know what to do. I'm committing this because regardless of the fact that an error still exists, this fixes one. I would spend more time on this, but it's nearing 1 am and I have a feeling I'll forget what I was doing when I wake up. I'll create a issue and reference this pull request as well. Thanks for the help, Micah. 2017-12-13 00:40:19 -05:00			`_VALID_URL = r'https?://(?:www\.)?byutv\.org/player/event/(?P<id>[0-9a-f-]+)'`
[byutv:event] Add extractor 2016-10-02 00:50:07 +07:00			`_TEST = {`
			`'url': 'http://www.byutv.org/watch/event/29941b9b-8bf6-48d2-aebf-7a87add9e34b',`
			`'info_dict': {`
			`'id': '29941b9b-8bf6-48d2-aebf-7a87add9e34b',`
			`'ext': 'mp4',`
			`'title': 'Toledo vs. BYU (9/30/16)',`
			`},`
			`'params': {`
			`'skip_download': True,`
			`},`
			`'add_ie': ['Ooyala'],`
			`}`

			`def _real_extract(self, url):`
			`video_id = self._match_id(url)`

			`webpage = self._download_webpage(url, video_id)`

			`ooyala_id = self._search_regex(`
			`r'providerId\s:\s(["\'])(?P<id>(?:(?!\1).)+)\1',`
			`webpage, 'ooyala id', group='id')`

			`title = self._search_regex(`
			`r'class=["\']description["\'][^>]>\s<h1>([^<]+)</h1>', webpage,`
			`'title').strip()`

			`return {`
			`'_type': 'url_transparent',`
			`'ie_key': 'Ooyala',`
			`'url': 'ooyala:%s' % ooyala_id,`
			`'id': video_id,`
			`'title': title,`
			`}`