[yle] Add extractor YLEElavaArkisto
This commit is contained in:
parent
43e449f2e9
commit
32256e4500
@ -320,6 +320,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_strdate('July 15th, 2013'), '20130715')
|
||||
self.assertEqual(unified_strdate('September 1st, 2013'), '20130901')
|
||||
self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902')
|
||||
self.assertEqual(unified_strdate('10.04.2015 07:52:34'), '20150410')
|
||||
|
||||
def test_unified_timestamps(self):
|
||||
self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
|
||||
@ -343,6 +344,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
|
||||
self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
|
||||
self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
|
||||
self.assertEqual(unified_timestamp('10.04.2015 07:52:34'), 1428652354)
|
||||
|
||||
def test_determine_ext(self):
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||
|
@ -1324,6 +1324,7 @@ from .yesjapan import YesJapanIE
|
||||
from .yinyuetai import YinYueTaiIE
|
||||
from .yle import (
|
||||
YLEAreenaIE,
|
||||
YLEElavaArkistoIE,
|
||||
)
|
||||
from .ynet import YnetIE
|
||||
from .youjizz import YouJizzIE
|
||||
|
@ -15,6 +15,8 @@ from ..utils import (
|
||||
intlist_to_bytes,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
strip_jsonp,
|
||||
unified_timestamp,
|
||||
ExtractorError,
|
||||
)
|
||||
from ..aes import (
|
||||
@ -240,3 +242,97 @@ class YLEAreenaIE(InfoExtractor):
|
||||
plaintext = intlist_to_bytes(decrypted_data)
|
||||
|
||||
return plaintext
|
||||
|
||||
|
||||
class YLEElavaArkistoIE(YLEAreenaIE):
|
||||
_VALID_URL = r'http://(?:www\.)?yle\.fi/aihe.*/(?P<id>[^?#]+).*'
|
||||
_PROTOCOLS = ['RTMPE', 'HDS']
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://yle.fi/aihe/artikkeli/2006/10/02/sukellusvenematkailu',
|
||||
'info_dict': {
|
||||
'title': 'Sukellusvenematkailu',
|
||||
'description': 'md5:73535674a03844ee4c42f48f857d1a24',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
'info_dict': {
|
||||
'id': '6-2ab2e2094cea469bbaf800246ce71145',
|
||||
'ext': 'flv',
|
||||
'title': 'Wärtsilä rakentaa sukellusveneen',
|
||||
'description': 'md5:04d5d641fc744e1244e698f9e4523c24',
|
||||
'duration': 45,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1317652519,
|
||||
'upload_date': '20111003',
|
||||
},
|
||||
},
|
||||
{
|
||||
'info_dict': {
|
||||
'id': '6-60c5f932221940d9a5a0814aafb0b709',
|
||||
'ext': 'flv',
|
||||
'title': 'Sukellusvenejuttu ja säätiedotus',
|
||||
'description': 'md5:04d5d641fc744e1244e698f9e4523c24',
|
||||
'duration': 162,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1317652519,
|
||||
'upload_date': '20111003',
|
||||
},
|
||||
},
|
||||
],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id, 'Downloading article')
|
||||
|
||||
clip_ids = re.findall(r'data-id="([^"]+)"', self._search_regex(
|
||||
r'(?s)<div[^>]+class=(["\']).*?\bcontent\b.*?\1[^>]*>(?P<content>.*?)<div[^>]*class=(["\'])ydd-categories\3[^>]*>',
|
||||
webpage, 'Article content', default='', group='content'))
|
||||
|
||||
playlist = []
|
||||
for clip_id in clip_ids:
|
||||
mediaid = clip_id.split('-')[-1]
|
||||
mediaurl = 'http://player.yle.fi/api/v1/elavaarkisto.jsonp?' \
|
||||
'id={mediaid}'.format(mediaid=mediaid)
|
||||
|
||||
data = self._download_json(
|
||||
mediaurl, mediaid, transform_source=strip_jsonp).get(
|
||||
'data', {}).get('ea', {})
|
||||
|
||||
media_kanta_id = data.get('mediakantaId', None)
|
||||
|
||||
if not media_kanta_id:
|
||||
continue
|
||||
media_id = '6-' + media_kanta_id
|
||||
|
||||
formats, subtitles = self._extract_formats(media_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
playlist.append({
|
||||
'id': media_id,
|
||||
'title': data.get('otsikko'),
|
||||
'description': data.get('description'),
|
||||
'formats': formats,
|
||||
'timestamp': unified_timestamp(data.get('published_DateTime')),
|
||||
'duration': parse_duration(data.get('duration')),
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': data.get('previewImage'),
|
||||
'series': data.get('originalTitle'),
|
||||
})
|
||||
|
||||
if not playlist:
|
||||
raise ExtractorError('Unable to extract metadata')
|
||||
|
||||
if len(playlist) == 1:
|
||||
return playlist[0]
|
||||
|
||||
return self.playlist_result(
|
||||
playlist,
|
||||
playlist_title=self._og_search_title(webpage),
|
||||
playlist_description=self._og_search_description(webpage))
|
||||
|
@ -168,6 +168,7 @@ DATE_FORMATS_DAY_FIRST.extend([
|
||||
'%d.%m.%y',
|
||||
'%d/%m/%Y',
|
||||
'%d/%m/%y',
|
||||
'%d.%m.%Y %H:%M:%S',
|
||||
'%d/%m/%Y %H:%M:%S',
|
||||
])
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user