[arret-sur-images] Add new extractor

2017-03-07 09:31:18 +01:00 · 2017-03-07 09:31:18 +01:00 · c05a44d186
commit c05a44d186
parent e30ccf7047
2 changed files with 72 additions and 0 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -69,6 +69,7 @@ from .arte import (
    TheOperaPlatformIE,
    ArteTVPlaylistIE,
 )
+from .loubiana import ArretSurImagesIE
 from .atresplayer import AtresPlayerIE
 from .atttechchannel import ATTTechChannelIE
 from .audimedia import AudiMediaIE
--- a/youtube_dl/extractor/loubiana.py
+++ b/youtube_dl/extractor/loubiana.py
@ -0,0 +1,71 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    sanitized_Request,
+    urlencode_postdata,
+)
+
+
+class ArretSurImagesIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?arretsurimages\.net/emissions/.*?-id(?P<id>[0-9]+)'
+    _LOGIN_URL = 'https://www.arretsurimages.net/forum/login.php'
+    _TEST = {
+        'url': 'https://www.arretsurimages.net/emissions/2017-02-17/Theo-La-matraque-telescopique-beaucoup-de-collegues-l-ont-demandee-id9557',
+        'md5': '650d2102dad67b2b6a94ac9c063f6d5b',
+        'info_dict': {
+            'id': '9557',
+            'ext': 'mp4',
+            'title': 'Théo : "La matraque télescopique, beaucoup de collègues l\'ont demandée"',
+        },
+        'skip': 'Requires account credentials',
+    }
+
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+
+        login_data = urlencode_postdata({
+            'ok': 'Valider',
+            'username': username,
+            'password': password,
+        });
+
+        login_results = self._download_webpage(
+            sanitized_Request(self._LOGIN_URL, login_data),
+            None, note='Logging in', errnote='Unable to log in')
+
+        if re.search(r'(?i)Ce nom d\'utilisateur / mot de passe est introuvable ou inactif. Recommencez', login_results) is not None:
+            self._downloader.report_warning('unable to log in: bad username or password')
+            return False
+
+        return True
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+        video_title = self._html_search_regex(
+            r'<h1 itemprop="headline">(.*?)</h1>',
+            webpage, 'title')
+        download_url = self._html_search_regex(
+            r'<a href="(http://v42.arretsurimages.net/telecharger/.*?.mp4)" target="_blank" class="bouton-telecharger"></a>',
+            webpage, 'download information')
+
+        download_page = self._download_webpage(download_url, video_id, 'Downloading download information page')
+        video_url = self._html_search_regex(
+            r'<a id="file" href="(http://v42.arretsurimages.net/fichiers/.*?.mp4)" download>suivre ce lien</a>',
+            download_page, 'video url')
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'url': video_url,
+        }