diff --git a/youtube_dl/extractor/joj.py b/youtube_dl/extractor/joj.py index dd04fae28..601402f39 100755 --- a/youtube_dl/extractor/joj.py +++ b/youtube_dl/extractor/joj.py @@ -2,52 +2,55 @@ from __future__ import unicode_literals from .common import InfoExtractor +import re class JojIE(InfoExtractor): - _VALID_URL = r'https?://(?P[a-z0-9]+\.)joj\.sk/([^/]+/)*(?P(?P[0-9]{4}(-[0-9]{2}){2}).*)' # noqa - _TESTS = [ { + _VALID_URL = r'https?://([a-z0-9]+\.)joj\.sk/([^/]+/)*(?P(?P[0-9]{4}(-[0-9]{2}){2}).*)' # noqa + _TESTS = [{ 'url': 'https://www.joj.sk/nove-byvanie/archiv/2017-05-28-nove-byvanie', # noqa - 'md5': '731727f2caf35a3fcaf556853f92b6e1', 'info_dict': { 'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932', 'ext': 'mp4', - 'title': '2017-05-28 - Nové Bývanie' + 'title': 'Nové Bývanie', + 'release_date': '20170528' } }, { - 'url': 'http://nasi.joj.sk/epizody/2016-09-06-stari-rodicia', # noqa - 'md5': '13626f2d9e237a17ea72bcaaf2738311', + 'url': 'http://nasi.joj.sk/epizody/2016-09-06-stari-rodicia', 'info_dict': { 'id': 'f18b2c5f-9ea8-4941-a164-a814c53306ad', 'ext': 'mp4', - 'title': '2016-09-06 - Starí Rodičia' + 'title': 'Starí Rodičia', + 'release_date': '20160906' } - } ] - # http://nasi.joj.sk/epizody/2016-09-06-stari-rodicia - # https://velkenoviny.joj.sk/archiv/2017-05-29-noviny-tv-joj + }] + + media_src_url = 'http://n16.joj.sk/storage/' + xml_source_url = 'https://media.joj.sk/services/Video.php?clip=' + def _real_extract(self, url): - title_query = self._search_regex(self._VALID_URL, url, 'title_query', - group='url_title') - timestamp = self._search_regex(self._VALID_URL, url, 'timestamp', - group='timestamp', fatal=False) - # timestamp = '2017-05-28' - webpage = self._download_webpage(url, title_query) - title_simple = self._og_search_title(webpage).title() - title = "{timestamp} - {title_simple}".format(**locals()) + mobj = re.match(self._VALID_URL, url) + title_query = mobj.group('title_query') + release_date = mobj.group('release_date').replace('-', '') + webpage = self._download_webpage(url, 'video_id') video_id = self._html_search_regex( - r'