From d24ace4c8fbe93a11133ddc1f4629b5626ec6bd8 Mon Sep 17 00:00:00 2001 From: luboss Date: Fri, 2 Jun 2017 22:44:39 +0200 Subject: [PATCH 1/3] [Joj] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/joj.py | 53 ++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100755 youtube_dl/extractor/joj.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ed603eb29..98b93d99d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -463,6 +463,7 @@ from .jamendo import ( ) from .jeuxvideo import JeuxVideoIE from .jove import JoveIE +from .joj import JojIE from .jwplatform import JWPlatformIE from .jpopsukitv import JpopsukiIE from .kaltura import KalturaIE diff --git a/youtube_dl/extractor/joj.py b/youtube_dl/extractor/joj.py new file mode 100755 index 000000000..dd04fae28 --- /dev/null +++ b/youtube_dl/extractor/joj.py @@ -0,0 +1,53 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class JojIE(InfoExtractor): + _VALID_URL = r'https?://(?P[a-z0-9]+\.)joj\.sk/([^/]+/)*(?P(?P[0-9]{4}(-[0-9]{2}){2}).*)' # noqa + _TESTS = [ { + 'url': 'https://www.joj.sk/nove-byvanie/archiv/2017-05-28-nove-byvanie', # noqa + 'md5': '731727f2caf35a3fcaf556853f92b6e1', + 'info_dict': { + 'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932', + 'ext': 'mp4', + 'title': '2017-05-28 - Nové Bývanie' + } + }, { + 'url': 'http://nasi.joj.sk/epizody/2016-09-06-stari-rodicia', # noqa + 'md5': '13626f2d9e237a17ea72bcaaf2738311', + 'info_dict': { + 'id': 'f18b2c5f-9ea8-4941-a164-a814c53306ad', + 'ext': 'mp4', + 'title': '2016-09-06 - Starí Rodičia' + } + } ] + # http://nasi.joj.sk/epizody/2016-09-06-stari-rodicia + # https://velkenoviny.joj.sk/archiv/2017-05-29-noviny-tv-joj + def _real_extract(self, url): + title_query = self._search_regex(self._VALID_URL, url, 'title_query', + group='url_title') + timestamp = self._search_regex(self._VALID_URL, url, 'timestamp', + group='timestamp', fatal=False) + # timestamp = '2017-05-28' + webpage = self._download_webpage(url, title_query) + title_simple = self._og_search_title(webpage).title() + title = "{timestamp} - {title_simple}".format(**locals()) + video_id = self._html_search_regex( + r'