From f9cad05465d525fcac22bdc8af6ac6ed2186052a Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Thu, 26 Jan 2017 16:51:04 +0100 Subject: [PATCH] [euronews] Add new information extractor --- youtube_dl/extractor/euronews.py | 61 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 62 insertions(+) create mode 100644 youtube_dl/extractor/euronews.py diff --git a/youtube_dl/extractor/euronews.py b/youtube_dl/extractor/euronews.py new file mode 100644 index 000000000..e4110a882 --- /dev/null +++ b/youtube_dl/extractor/euronews.py @@ -0,0 +1,61 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + remove_end, + unified_strdate, +) + + +class EuronewsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?:[a-z]+\.)?euronews\.com/(?P\d+/\d+/\d+/[^/]+$)' + _TESTS = [{ + 'url': 'http://de.euronews.com/2017/01/24/the-brief-from-brussels-martin-schulz-tritt-gegen-angela-merkel-an', + 'md5': '32d56fdbe4778354ff4afcd0ef97c1c8', + 'info_dict': { + 'id': '2017/01/24/the-brief-from-brussels-martin-schulz-tritt-gegen-angela-merkel-an', + 'ext': 'mp4', + 'title': 'The Brief from Brussels: Martin Schulz tritt gegen Angela Merkel an', + 'description': 'md5:a49ceceb9f277cd93a4836bfc54498f1', + 'upload_date': '20170124', + 'thumbnail': 'http://static.euronews.com/articles/355867/1000x563_355867.jpg', + }, + }, { + 'url': 'http://www.euronews.com/2017/01/25/team-usa-takes-gold-at-chef-olympics-in-france', + 'info_dict': { + 'id': '2017/01/25/team-usa-takes-gold-at-chef-olympics-in-france', + 'ext': 'mp4', + 'title': '''Team USA takes gold at 'Chef Olympics' in France''', + 'description': 'md5:a1d7f4dd524a46a66d201e0634dc5aee', + 'upload_date': '20170125', + 'thumbnail': 'http://static.euronews.com/articles/356014/1000x563_356014.jpg' + }, + 'params': { + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._og_search_title(webpage) + if title: + title = remove_end(title, '| Euronews').strip() + description = self._og_search_description(webpage) + + video_url = self._og_search_video_url(webpage) + thumbnail = self._og_search_thumbnail(webpage) + upload_date = self._html_search_meta('date.created', webpage) + if upload_date: + upload_date = unified_strdate(upload_date) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f09b4cf2c..6a8a03acc 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -280,6 +280,7 @@ from .espn import ( ESPNArticleIE, ) from .esri import EsriVideoIE +from .euronews import EuronewsIE from .europa import EuropaIE from .everyonesmixtape import EveryonesMixtapeIE from .expotv import ExpoTVIE