[yle] Add extractor YLEElavaArkisto
This commit is contained in:
parent
43e449f2e9
commit
32256e4500
@ -320,6 +320,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(unified_strdate('July 15th, 2013'), '20130715')
|
self.assertEqual(unified_strdate('July 15th, 2013'), '20130715')
|
||||||
self.assertEqual(unified_strdate('September 1st, 2013'), '20130901')
|
self.assertEqual(unified_strdate('September 1st, 2013'), '20130901')
|
||||||
self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902')
|
self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902')
|
||||||
|
self.assertEqual(unified_strdate('10.04.2015 07:52:34'), '20150410')
|
||||||
|
|
||||||
def test_unified_timestamps(self):
|
def test_unified_timestamps(self):
|
||||||
self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
|
self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
|
||||||
@ -343,6 +344,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
|
self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
|
||||||
self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
|
self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
|
||||||
self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
|
self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
|
||||||
|
self.assertEqual(unified_timestamp('10.04.2015 07:52:34'), 1428652354)
|
||||||
|
|
||||||
def test_determine_ext(self):
|
def test_determine_ext(self):
|
||||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||||
|
@ -1324,6 +1324,7 @@ from .yesjapan import YesJapanIE
|
|||||||
from .yinyuetai import YinYueTaiIE
|
from .yinyuetai import YinYueTaiIE
|
||||||
from .yle import (
|
from .yle import (
|
||||||
YLEAreenaIE,
|
YLEAreenaIE,
|
||||||
|
YLEElavaArkistoIE,
|
||||||
)
|
)
|
||||||
from .ynet import YnetIE
|
from .ynet import YnetIE
|
||||||
from .youjizz import YouJizzIE
|
from .youjizz import YouJizzIE
|
||||||
|
@ -15,6 +15,8 @@ from ..utils import (
|
|||||||
intlist_to_bytes,
|
intlist_to_bytes,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
strip_jsonp,
|
||||||
|
unified_timestamp,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
from ..aes import (
|
from ..aes import (
|
||||||
@ -240,3 +242,97 @@ class YLEAreenaIE(InfoExtractor):
|
|||||||
plaintext = intlist_to_bytes(decrypted_data)
|
plaintext = intlist_to_bytes(decrypted_data)
|
||||||
|
|
||||||
return plaintext
|
return plaintext
|
||||||
|
|
||||||
|
|
||||||
|
class YLEElavaArkistoIE(YLEAreenaIE):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?yle\.fi/aihe.*/(?P<id>[^?#]+).*'
|
||||||
|
_PROTOCOLS = ['RTMPE', 'HDS']
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://yle.fi/aihe/artikkeli/2006/10/02/sukellusvenematkailu',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Sukellusvenematkailu',
|
||||||
|
'description': 'md5:73535674a03844ee4c42f48f857d1a24',
|
||||||
|
},
|
||||||
|
'playlist': [
|
||||||
|
{
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6-2ab2e2094cea469bbaf800246ce71145',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Wärtsilä rakentaa sukellusveneen',
|
||||||
|
'description': 'md5:04d5d641fc744e1244e698f9e4523c24',
|
||||||
|
'duration': 45,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': 1317652519,
|
||||||
|
'upload_date': '20111003',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6-60c5f932221940d9a5a0814aafb0b709',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Sukellusvenejuttu ja säätiedotus',
|
||||||
|
'description': 'md5:04d5d641fc744e1244e698f9e4523c24',
|
||||||
|
'duration': 162,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': 1317652519,
|
||||||
|
'upload_date': '20111003',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id, 'Downloading article')
|
||||||
|
|
||||||
|
clip_ids = re.findall(r'data-id="([^"]+)"', self._search_regex(
|
||||||
|
r'(?s)<div[^>]+class=(["\']).*?\bcontent\b.*?\1[^>]*>(?P<content>.*?)<div[^>]*class=(["\'])ydd-categories\3[^>]*>',
|
||||||
|
webpage, 'Article content', default='', group='content'))
|
||||||
|
|
||||||
|
playlist = []
|
||||||
|
for clip_id in clip_ids:
|
||||||
|
mediaid = clip_id.split('-')[-1]
|
||||||
|
mediaurl = 'http://player.yle.fi/api/v1/elavaarkisto.jsonp?' \
|
||||||
|
'id={mediaid}'.format(mediaid=mediaid)
|
||||||
|
|
||||||
|
data = self._download_json(
|
||||||
|
mediaurl, mediaid, transform_source=strip_jsonp).get(
|
||||||
|
'data', {}).get('ea', {})
|
||||||
|
|
||||||
|
media_kanta_id = data.get('mediakantaId', None)
|
||||||
|
|
||||||
|
if not media_kanta_id:
|
||||||
|
continue
|
||||||
|
media_id = '6-' + media_kanta_id
|
||||||
|
|
||||||
|
formats, subtitles = self._extract_formats(media_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
playlist.append({
|
||||||
|
'id': media_id,
|
||||||
|
'title': data.get('otsikko'),
|
||||||
|
'description': data.get('description'),
|
||||||
|
'formats': formats,
|
||||||
|
'timestamp': unified_timestamp(data.get('published_DateTime')),
|
||||||
|
'duration': parse_duration(data.get('duration')),
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'thumbnail': data.get('previewImage'),
|
||||||
|
'series': data.get('originalTitle'),
|
||||||
|
})
|
||||||
|
|
||||||
|
if not playlist:
|
||||||
|
raise ExtractorError('Unable to extract metadata')
|
||||||
|
|
||||||
|
if len(playlist) == 1:
|
||||||
|
return playlist[0]
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
playlist,
|
||||||
|
playlist_title=self._og_search_title(webpage),
|
||||||
|
playlist_description=self._og_search_description(webpage))
|
||||||
|
@ -168,6 +168,7 @@ DATE_FORMATS_DAY_FIRST.extend([
|
|||||||
'%d.%m.%y',
|
'%d.%m.%y',
|
||||||
'%d/%m/%Y',
|
'%d/%m/%Y',
|
||||||
'%d/%m/%y',
|
'%d/%m/%y',
|
||||||
|
'%d.%m.%Y %H:%M:%S',
|
||||||
'%d/%m/%Y %H:%M:%S',
|
'%d/%m/%Y %H:%M:%S',
|
||||||
])
|
])
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user