Fix tests and rely on _match_id for some extractors

2016-09-29 16:20:50 +02:00 · 2016-09-29 16:20:50 +02:00 · f04a83da42
commit f04a83da42
parent 8f0cf20ab9
13 changed files with 40 additions and 68 deletions
--- a/youtube_dl/extractor/anysex.py
+++ b/youtube_dl/extractor/anysex.py
@ -26,9 +26,7 @@ class AnySexIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        video_url = self._html_search_regex(r"video_url\s*:\s*'([^']+)'", webpage, 'video URL')
--- a/youtube_dl/extractor/byutv.py
+++ b/youtube_dl/extractor/byutv.py
@ -1,6 +1,5 @@
 from __future__ import unicode_literals

-import json
 import re

 from .common import InfoExtractor
@ -8,7 +7,7 @@ from ..utils import ExtractorError


 class BYUtvIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
+    _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<id>[^/?#]+)'
    _TEST = {
        'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
        'md5': '05850eb8c749e2ee05ad5a1c34668493',
@ -27,15 +26,14 @@ class BYUtvIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('video_id')
+        video_id = self._match_id(url)

        webpage = self._download_webpage(url, video_id)
        episode_code = self._search_regex(
            r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
-        episode_json = re.sub(
-            r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code)
-        ep = json.loads(episode_json)
+        ep = self._parse_json(re.sub(
+            r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"',
+            episode_code), video_id)

        if ep['providerType'] == 'Ooyala':
            return {
--- a/youtube_dl/extractor/clubic.py
+++ b/youtube_dl/extractor/clubic.py
@ -1,9 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals

-import json
-import re
-
 from .common import InfoExtractor
 from ..utils import (
    clean_html,
@ -30,16 +27,14 @@ class ClubicIE(InfoExtractor):
    }]

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)

        player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id
        player_page = self._download_webpage(player_url, video_id)

-        config_json = self._search_regex(
+        config = self._parse_json(self._search_regex(
            r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page,
-            'configuration')
-        config = json.loads(config_json)
+            'configuration'), video_id)

        video_info = config['videoInfo']
        sources = config['sources']
--- a/youtube_dl/extractor/criterion.py
+++ b/youtube_dl/extractor/criterion.py
@ -1,8 +1,6 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals

-import re
-
 from .common import InfoExtractor


@ -20,16 +18,15 @@ class CriterionIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        final_url = self._search_regex(
-            r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
+            r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
        title = self._og_search_title(webpage)
        description = self._html_search_meta('description', webpage)
        thumbnail = self._search_regex(
-            r'so.addVariable\("thumbnailURL", "(.+?)"\)\;',
+            r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;',
            webpage, 'thumbnail url')

        return {
--- a/youtube_dl/extractor/dreisat.py
+++ b/youtube_dl/extractor/dreisat.py
@ -1,7 +1,5 @@
 from __future__ import unicode_literals

-import re
-
 from .zdf import ZDFIE


@ -32,7 +30,6 @@ class DreiSatIE(ZDFIE):
    ]

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
        details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
        return self.extract_from_xml_url(video_id, details_url)
--- a/youtube_dl/extractor/dropbox.py
+++ b/youtube_dl/extractor/dropbox.py
@ -26,8 +26,7 @@ class DropboxIE(InfoExtractor):
    ]

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
        fn = compat_urllib_parse_unquote(url_basename(url))
        title = os.path.splitext(fn)[0]
        video_url = re.sub(r'[?&]dl=0', '', url)
--- a/youtube_dl/extractor/freesound.py
+++ b/youtube_dl/extractor/freesound.py
@ -20,8 +20,8 @@ class FreesoundIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        music_id = mobj.group('id')
+        music_id = self._match_id(url)
+
        webpage = self._download_webpage(url, music_id)
        title = self._html_search_regex(
            r'<div id="single_sample_header">.*?<a href="#">(.+?)</a>',
--- a/youtube_dl/extractor/ina.py
+++ b/youtube_dl/extractor/ina.py
@ -1,8 +1,6 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals

-import re
-
 from .common import InfoExtractor


@ -19,9 +17,7 @@ class InaIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
        mrss_url = 'http://player.ina.fr/notices/%s.mrss' % video_id
        info_doc = self._download_xml(mrss_url, video_id)

--- a/youtube_dl/extractor/moviezine.py
+++ b/youtube_dl/extractor/moviezine.py
@ -1,14 +1,11 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals

-import re
-
 from .common import InfoExtractor


 class MoviezineIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?moviezine\.se/video/(?P<id>[^?#]+)'
-
    _TEST = {
        'url': 'http://www.moviezine.se/video/205866',
        'info_dict': {
@ -21,8 +18,7 @@ class MoviezineIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)

        webpage = self._download_webpage(url, video_id)
        jsplayer = self._download_webpage('http://www.moviezine.se/api/player.js?video=%s' % video_id, video_id, 'Downloading js api player')
--- a/youtube_dl/extractor/reverbnation.py
+++ b/youtube_dl/extractor/reverbnation.py
@ -1,7 +1,5 @@
 from __future__ import unicode_literals

-import re
-
 from .common import InfoExtractor
 from ..utils import str_or_none

@ -10,20 +8,19 @@ class ReverbNationIE(InfoExtractor):
    _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
    _TESTS = [{
        'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
-        'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
+        'md5': 'c0aaf339bcee189495fdf5a8c8ba8645',
        'info_dict': {
            'id': '16965047',
            'ext': 'mp3',
            'title': 'MONA LISA',
            'uploader': 'ALKILADOS',
            'uploader_id': '216429',
-            'thumbnail': 're:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$'
+            'thumbnail': 're:^https?://.*\.jpg',
        },
    }]

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        song_id = mobj.group('id')
+        song_id = self._match_id(url)

        api_res = self._download_json(
            'https://api.reverbnation.com/song/%s' % song_id,
@ -31,14 +28,20 @@ class ReverbNationIE(InfoExtractor):
            note='Downloading information of song %s' % song_id
        )

+        thumbnails = [{
+            'url': api_res.get('image'),
+        }, {
+            'url': api_res.get('thumbnail'),
+            'preference': -2,
+        }]
+
        return {
            'id': song_id,
-            'title': api_res.get('name'),
-            'url': api_res.get('url'),
+            'title': api_res['name'],
+            'url': api_res['url'],
            'uploader': api_res.get('artist', {}).get('name'),
            'uploader_id': str_or_none(api_res.get('artist', {}).get('id')),
-            'thumbnail': self._proto_relative_url(
-                api_res.get('image', api_res.get('thumbnail'))),
+            'thumbnails': thumbnails,
            'ext': 'mp3',
            'vcodec': 'none',
        }
--- a/youtube_dl/extractor/slutload.py
+++ b/youtube_dl/extractor/slutload.py
@ -1,7 +1,5 @@
 from __future__ import unicode_literals

-import re
-
 from .common import InfoExtractor


@ -9,7 +7,7 @@ class SlutloadIE(InfoExtractor):
    _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
    _TEST = {
        'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
-        'md5': '0cf531ae8006b530bd9df947a6a0df77',
+        'md5': '868309628ba00fd488cf516a113fd717',
        'info_dict': {
            'id': 'TD73btpBqSxc',
            'ext': 'mp4',
@ -20,8 +18,7 @@ class SlutloadIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)

        webpage = self._download_webpage(url, video_id)

--- a/youtube_dl/extractor/techtalks.py
+++ b/youtube_dl/extractor/techtalks.py
@ -4,7 +4,7 @@ import re

 from .common import InfoExtractor
 from ..utils import (
-    get_element_by_attribute,
+    get_element_by_class,
    clean_html,
 )

@ -41,15 +41,14 @@ class TechTalksIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        talk_id = mobj.group('id')
+        talk_id = self._match_id(url)
        webpage = self._download_webpage(url, talk_id)
        rtmp_url = self._search_regex(
            r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url')
        play_path = self._search_regex(
            r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
            webpage, 'presenter play path')
-        title = clean_html(get_element_by_attribute('class', 'title', webpage))
+        title = clean_html(get_element_by_class('title', webpage))
        video_info = {
            'id': talk_id,
            'title': title,
--- a/youtube_dl/extractor/unistra.py
+++ b/youtube_dl/extractor/unistra.py
@ -8,7 +8,6 @@ from ..utils import qualities

 class UnistraIE(InfoExtractor):
    _VALID_URL = r'https?://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)'
-
    _TESTS = [
        {
            'url': 'http://utv.unistra.fr/video.php?id_video=154',
@ -33,9 +32,7 @@ class UnistraIE(InfoExtractor):
    ]

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        files = set(re.findall(r'file\s*:\s*"(/[^"]+)"', webpage))