From 32256e45001daea0f03f4e1c9bd7379838418a0f Mon Sep 17 00:00:00 2001 From: Hannu Lintala Date: Sat, 11 Jul 2015 04:16:35 +0300 Subject: [PATCH] [yle] Add extractor YLEElavaArkisto --- test/test_utils.py | 2 + youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/yle.py | 96 ++++++++++++++++++++++++++++++ youtube_dl/utils.py | 1 + 4 files changed, 100 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index efa73d0f4..8b25795c3 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -320,6 +320,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_strdate('July 15th, 2013'), '20130715') self.assertEqual(unified_strdate('September 1st, 2013'), '20130901') self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902') + self.assertEqual(unified_strdate('10.04.2015 07:52:34'), '20150410') def test_unified_timestamps(self): self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600) @@ -343,6 +344,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100) self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361) self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540) + self.assertEqual(unified_timestamp('10.04.2015 07:52:34'), 1428652354) def test_determine_ext(self): self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d60a2d6f9..6ba2480c5 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1324,6 +1324,7 @@ from .yesjapan import YesJapanIE from .yinyuetai import YinYueTaiIE from .yle import ( YLEAreenaIE, + YLEElavaArkistoIE, ) from .ynet import YnetIE from .youjizz import YouJizzIE diff --git a/youtube_dl/extractor/yle.py b/youtube_dl/extractor/yle.py index 7e79ce075..13fe4fef8 100644 --- a/youtube_dl/extractor/yle.py +++ b/youtube_dl/extractor/yle.py @@ -15,6 +15,8 @@ from ..utils import ( intlist_to_bytes, parse_duration, parse_iso8601, + strip_jsonp, + unified_timestamp, ExtractorError, ) from ..aes import ( @@ -240,3 +242,97 @@ class YLEAreenaIE(InfoExtractor): plaintext = intlist_to_bytes(decrypted_data) return plaintext + + +class YLEElavaArkistoIE(YLEAreenaIE): + _VALID_URL = r'http://(?:www\.)?yle\.fi/aihe.*/(?P[^?#]+).*' + _PROTOCOLS = ['RTMPE', 'HDS'] + + _TESTS = [ + { + 'url': 'http://yle.fi/aihe/artikkeli/2006/10/02/sukellusvenematkailu', + 'info_dict': { + 'title': 'Sukellusvenematkailu', + 'description': 'md5:73535674a03844ee4c42f48f857d1a24', + }, + 'playlist': [ + { + 'info_dict': { + 'id': '6-2ab2e2094cea469bbaf800246ce71145', + 'ext': 'flv', + 'title': 'Wärtsilä rakentaa sukellusveneen', + 'description': 'md5:04d5d641fc744e1244e698f9e4523c24', + 'duration': 45, + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1317652519, + 'upload_date': '20111003', + }, + }, + { + 'info_dict': { + 'id': '6-60c5f932221940d9a5a0814aafb0b709', + 'ext': 'flv', + 'title': 'Sukellusvenejuttu ja säätiedotus', + 'description': 'md5:04d5d641fc744e1244e698f9e4523c24', + 'duration': 162, + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1317652519, + 'upload_date': '20111003', + }, + }, + ], + 'params': { + 'skip_download': True, + }, + }, + ] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id, 'Downloading article') + + clip_ids = re.findall(r'data-id="([^"]+)"', self._search_regex( + r'(?s)]+class=(["\']).*?\bcontent\b.*?\1[^>]*>(?P.*?)]*class=(["\'])ydd-categories\3[^>]*>', + webpage, 'Article content', default='', group='content')) + + playlist = [] + for clip_id in clip_ids: + mediaid = clip_id.split('-')[-1] + mediaurl = 'http://player.yle.fi/api/v1/elavaarkisto.jsonp?' \ + 'id={mediaid}'.format(mediaid=mediaid) + + data = self._download_json( + mediaurl, mediaid, transform_source=strip_jsonp).get( + 'data', {}).get('ea', {}) + + media_kanta_id = data.get('mediakantaId', None) + + if not media_kanta_id: + continue + media_id = '6-' + media_kanta_id + + formats, subtitles = self._extract_formats(media_id) + self._sort_formats(formats) + + playlist.append({ + 'id': media_id, + 'title': data.get('otsikko'), + 'description': data.get('description'), + 'formats': formats, + 'timestamp': unified_timestamp(data.get('published_DateTime')), + 'duration': parse_duration(data.get('duration')), + 'subtitles': subtitles, + 'thumbnail': data.get('previewImage'), + 'series': data.get('originalTitle'), + }) + + if not playlist: + raise ExtractorError('Unable to extract metadata') + + if len(playlist) == 1: + return playlist[0] + + return self.playlist_result( + playlist, + playlist_title=self._og_search_title(webpage), + playlist_description=self._og_search_description(webpage)) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 92b22e639..495555f40 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -168,6 +168,7 @@ DATE_FORMATS_DAY_FIRST.extend([ '%d.%m.%y', '%d/%m/%Y', '%d/%m/%y', + '%d.%m.%Y %H:%M:%S', '%d/%m/%Y %H:%M:%S', ])