[arret-sur-images] Add new extractor

2017-03-07 09:31:18 +01:00 · 2017-03-07 09:31:18 +01:00 · c05a44d186
commit c05a44d186
parent e30ccf7047
2 changed files with 72 additions and 0 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -69,6 +69,7 @@ from .arte import (
    TheOperaPlatformIE,
    ArteTVPlaylistIE,
 )
 from .loubiana import ArretSurImagesIE
 from .atresplayer import AtresPlayerIE
 from .atttechchannel import ATTTechChannelIE
 from .audimedia import AudiMediaIE
--- a/youtube_dl/extractor/loubiana.py
+++ b/youtube_dl/extractor/loubiana.py
@ -0,0 +1,71 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    sanitized_Request,
    urlencode_postdata,
 )
 class ArretSurImagesIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?arretsurimages\.net/emissions/.*?-id(?P<id>[0-9]+)'
    _LOGIN_URL = 'https://www.arretsurimages.net/forum/login.php'
    _TEST = {
        'url': 'https://www.arretsurimages.net/emissions/2017-02-17/Theo-La-matraque-telescopique-beaucoup-de-collegues-l-ont-demandee-id9557',
        'md5': '650d2102dad67b2b6a94ac9c063f6d5b',
        'info_dict': {
            'id': '9557',
            'ext': 'mp4',
            'title': 'Théo : "La matraque télescopique, beaucoup de collègues l\'ont demandée"',
        },
        'skip': 'Requires account credentials',
    }
    def _real_initialize(self):
        self._login()
    def _login(self):
        (username, password) = self._get_login_info()
        if username is None:
            return
        login_data = urlencode_postdata({
            'ok': 'Valider',
            'username': username,
            'password': password,
        });
        login_results = self._download_webpage(
            sanitized_Request(self._LOGIN_URL, login_data),
            None, note='Logging in', errnote='Unable to log in')
        if re.search(r'(?i)Ce nom d\'utilisateur / mot de passe est introuvable ou inactif. Recommencez', login_results) is not None:
            self._downloader.report_warning('unable to log in: bad username or password')
            return False
        return True
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        video_title = self._html_search_regex(
            r'<h1 itemprop="headline">(.*?)</h1>',
            webpage, 'title')
        download_url = self._html_search_regex(
            r'<a href="(http://v42.arretsurimages.net/telecharger/.*?.mp4)" target="_blank" class="bouton-telecharger"></a>',
            webpage, 'download information')
        download_page = self._download_webpage(download_url, video_id, 'Downloading download information page')
        video_url = self._html_search_regex(
            r'<a id="file" href="(http://v42.arretsurimages.net/fichiers/.*?.mp4)" download>suivre ce lien</a>',
            download_page, 'video url')
        return {
            'id': video_id,
            'title': video_title,
            'url': video_url,
        }