diff --git a/youtube_dl/extractor/canalu.py b/youtube_dl/extractor/canalu.py new file mode 100644 index 000000000..645f9b5a2 --- /dev/null +++ b/youtube_dl/extractor/canalu.py @@ -0,0 +1,73 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + month_by_name, + unescapeHTML +) +from re import DOTALL + + +class CanalUIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?canal-u\.tv/video/(?P.*)' + _TESTS = [ + { + 'url': 'https://www.canal-u.tv/video/ecole_normale_superieure_de_lyon/gouvernement.3118', + 'md5': '9c185d26b232c3c06d805c0d639af254', + 'info_dict': { + 'id': 'ecole_normale_superieure_de_lyon/gouvernement.3118', + 'ext': 'mp4', + 'duration': 600, + 'creator': 'SENELLART Michel', + 'title': 'Gouvernement', + 'description': 'Les essentiels : La philo par les mots - Gouvernement', + 'thumbnail': 'https://www.canal-u.tv/media/images/groupe_ens_lsh/gouvernement_3118/vignette.les.essentiels.jpg', + 'release_date': '20071015'} + }, + { + 'url': 'https://www.canal-u.tv/video/ecole_normale_superieure_de_lyon/les_competences_en_situation_d_apprentissage.20850', + 'md5': 'f06aab78bf60c2a2340a733c18a5ef10', + 'info_dict': { + 'id': 'ecole_normale_superieure_de_lyon/les_competences_en_situation_d_apprentissage.20850', + 'ext': 'mp4', + 'duration': 360, + 'creator': 'COULET Jean-Claude', + 'title': 'Les compétences en situation d\'apprentissage', + 'description': 'Cette capsule présente comment on peut décliner la notion de compétence,\r dans les situations pédagogiques, en donnant un sens précis aux \r concepts de situation, tâche, et activité. Elle ouvre des pistes de \r réflexion sur l\'articulation de ces notions dans les situations \r d\'éducation et de formation.', + 'thumbnail': 'https://www.canal-u.tv/media/images/groupe_ens_lsh/les.comp.tences.en.situation.d.apprentissage_20850/craies.jpg', + 'release_date': '20151215', + } + }] + + def _real_extract(self, url): + video = {} + video_id = self._match_id(url) + video['id'] = video_id + webpage = self._download_webpage(url, video_id) + + video['title'] = self._og_search_title(webpage) + video['url'] = self._html_search_regex(r'file: "(.*?\.mp4)",', webpage, 'url') + video['ext'] = 'mp4' + + # Thumbnail + video['thumbnail'] = self._og_search_thumbnail(webpage, default=None) + # Description + description_regex = r'
.*?

\s*(.*?)\s*

.*?
' + video['description'] = self._html_search_regex(description_regex, webpage, 'description', flags=DOTALL, default=None) + # Other fields + for field in [ + ['duration', 'Durée du programme', '(\d+) min'], + ['creator', 'Auteur\(s\)', '(.*?)'], + ['release_date', 'Date de réalisation', '(.*?)'], + ]: + regex = r'
{0} : {1}
'.format(field[1], field[2]) + video[field[0]] = self._html_search_regex(regex, webpage, field[0], flags=DOTALL, default=None) + # Duration + video['duration'] = int_or_none(video['duration'], invscale=60) + # Release date + date = video['release_date'].split(' ') + video["release_date"] = "{0}{1}{2}".format(date[2], month_by_name(unescapeHTML(date[1]).lower(), 'fr'), date[0]) + + return video diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a693f8c56..0fd84f343 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -128,6 +128,7 @@ from .camdemy import ( ) from .camwithher import CamWithHerIE from .canalplus import CanalplusIE +from .canalu import CanalUIE from .canalc2 import Canalc2IE from .canvas import CanvasIE from .carambatv import (