Fix tests and rely on _match_id for some extractors

This commit is contained in:
Déstin Reed 2016-09-29 16:20:50 +02:00
parent 8f0cf20ab9
commit f04a83da42
13 changed files with 40 additions and 68 deletions

View File

@ -26,9 +26,7 @@ class AnySexIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(r"video_url\s*:\s*'([^']+)'", webpage, 'video URL') video_url = self._html_search_regex(r"video_url\s*:\s*'([^']+)'", webpage, 'video URL')

View File

@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -8,7 +7,7 @@ from ..utils import ExtractorError
class BYUtvIE(InfoExtractor): class BYUtvIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)' _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<id>[^/?#]+)'
_TEST = { _TEST = {
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
'md5': '05850eb8c749e2ee05ad5a1c34668493', 'md5': '05850eb8c749e2ee05ad5a1c34668493',
@ -27,15 +26,14 @@ class BYUtvIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
episode_code = self._search_regex( episode_code = self._search_regex(
r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information') r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
episode_json = re.sub( ep = self._parse_json(re.sub(
r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code) r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"',
ep = json.loads(episode_json) episode_code), video_id)
if ep['providerType'] == 'Ooyala': if ep['providerType'] == 'Ooyala':
return { return {

View File

@ -1,9 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, clean_html,
@ -30,16 +27,14 @@ class ClubicIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id
player_page = self._download_webpage(player_url, video_id) player_page = self._download_webpage(player_url, video_id)
config_json = self._search_regex( config = self._parse_json(self._search_regex(
r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page, r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page,
'configuration') 'configuration'), video_id)
config = json.loads(config_json)
video_info = config['videoInfo'] video_info = config['videoInfo']
sources = config['sources'] sources = config['sources']

View File

@ -1,8 +1,6 @@
# -*- coding: utf-8 -*- # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
@ -20,16 +18,15 @@ class CriterionIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
final_url = self._search_regex( final_url = self._search_regex(
r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
title = self._og_search_title(webpage) title = self._og_search_title(webpage)
description = self._html_search_meta('description', webpage) description = self._html_search_meta('description', webpage)
thumbnail = self._search_regex( thumbnail = self._search_regex(
r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;',
webpage, 'thumbnail url') webpage, 'thumbnail url')
return { return {

View File

@ -1,7 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .zdf import ZDFIE from .zdf import ZDFIE
@ -32,7 +30,6 @@ class DreiSatIE(ZDFIE):
] ]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
return self.extract_from_xml_url(video_id, details_url) return self.extract_from_xml_url(video_id, details_url)

View File

@ -26,8 +26,7 @@ class DropboxIE(InfoExtractor):
] ]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
fn = compat_urllib_parse_unquote(url_basename(url)) fn = compat_urllib_parse_unquote(url_basename(url))
title = os.path.splitext(fn)[0] title = os.path.splitext(fn)[0]
video_url = re.sub(r'[?&]dl=0', '', url) video_url = re.sub(r'[?&]dl=0', '', url)

View File

@ -20,8 +20,8 @@ class FreesoundIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) music_id = self._match_id(url)
music_id = mobj.group('id')
webpage = self._download_webpage(url, music_id) webpage = self._download_webpage(url, music_id)
title = self._html_search_regex( title = self._html_search_regex(
r'<div id="single_sample_header">.*?<a href="#">(.+?)</a>', r'<div id="single_sample_header">.*?<a href="#">(.+?)</a>',

View File

@ -1,8 +1,6 @@
# encoding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
@ -19,9 +17,7 @@ class InaIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
mrss_url = 'http://player.ina.fr/notices/%s.mrss' % video_id mrss_url = 'http://player.ina.fr/notices/%s.mrss' % video_id
info_doc = self._download_xml(mrss_url, video_id) info_doc = self._download_xml(mrss_url, video_id)

View File

@ -1,14 +1,11 @@
# -*- coding: utf-8 -*- # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
class MoviezineIE(InfoExtractor): class MoviezineIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?moviezine\.se/video/(?P<id>[^?#]+)' _VALID_URL = r'https?://(?:www\.)?moviezine\.se/video/(?P<id>[^?#]+)'
_TEST = { _TEST = {
'url': 'http://www.moviezine.se/video/205866', 'url': 'http://www.moviezine.se/video/205866',
'info_dict': { 'info_dict': {
@ -21,8 +18,7 @@ class MoviezineIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
jsplayer = self._download_webpage('http://www.moviezine.se/api/player.js?video=%s' % video_id, video_id, 'Downloading js api player') jsplayer = self._download_webpage('http://www.moviezine.se/api/player.js?video=%s' % video_id, video_id, 'Downloading js api player')

View File

@ -1,7 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import str_or_none from ..utils import str_or_none
@ -10,20 +8,19 @@ class ReverbNationIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$' _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
_TESTS = [{ _TESTS = [{
'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
'md5': '3da12ebca28c67c111a7f8b262d3f7a7', 'md5': 'c0aaf339bcee189495fdf5a8c8ba8645',
'info_dict': { 'info_dict': {
'id': '16965047', 'id': '16965047',
'ext': 'mp3', 'ext': 'mp3',
'title': 'MONA LISA', 'title': 'MONA LISA',
'uploader': 'ALKILADOS', 'uploader': 'ALKILADOS',
'uploader_id': '216429', 'uploader_id': '216429',
'thumbnail': 're:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$' 'thumbnail': 're:^https?://.*\.jpg',
}, },
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) song_id = self._match_id(url)
song_id = mobj.group('id')
api_res = self._download_json( api_res = self._download_json(
'https://api.reverbnation.com/song/%s' % song_id, 'https://api.reverbnation.com/song/%s' % song_id,
@ -31,14 +28,20 @@ class ReverbNationIE(InfoExtractor):
note='Downloading information of song %s' % song_id note='Downloading information of song %s' % song_id
) )
thumbnails = [{
'url': api_res.get('image'),
}, {
'url': api_res.get('thumbnail'),
'preference': -2,
}]
return { return {
'id': song_id, 'id': song_id,
'title': api_res.get('name'), 'title': api_res['name'],
'url': api_res.get('url'), 'url': api_res['url'],
'uploader': api_res.get('artist', {}).get('name'), 'uploader': api_res.get('artist', {}).get('name'),
'uploader_id': str_or_none(api_res.get('artist', {}).get('id')), 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')),
'thumbnail': self._proto_relative_url( 'thumbnails': thumbnails,
api_res.get('image', api_res.get('thumbnail'))),
'ext': 'mp3', 'ext': 'mp3',
'vcodec': 'none', 'vcodec': 'none',
} }

View File

@ -1,7 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
@ -9,7 +7,7 @@ class SlutloadIE(InfoExtractor):
_VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$' _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
_TEST = { _TEST = {
'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/', 'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
'md5': '0cf531ae8006b530bd9df947a6a0df77', 'md5': '868309628ba00fd488cf516a113fd717',
'info_dict': { 'info_dict': {
'id': 'TD73btpBqSxc', 'id': 'TD73btpBqSxc',
'ext': 'mp4', 'ext': 'mp4',
@ -20,8 +18,7 @@ class SlutloadIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)

View File

@ -4,7 +4,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
get_element_by_attribute, get_element_by_class,
clean_html, clean_html,
) )
@ -41,15 +41,14 @@ class TechTalksIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) talk_id = self._match_id(url)
talk_id = mobj.group('id')
webpage = self._download_webpage(url, talk_id) webpage = self._download_webpage(url, talk_id)
rtmp_url = self._search_regex( rtmp_url = self._search_regex(
r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url') r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url')
play_path = self._search_regex( play_path = self._search_regex(
r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"', r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
webpage, 'presenter play path') webpage, 'presenter play path')
title = clean_html(get_element_by_attribute('class', 'title', webpage)) title = clean_html(get_element_by_class('title', webpage))
video_info = { video_info = {
'id': talk_id, 'id': talk_id,
'title': title, 'title': title,

View File

@ -8,7 +8,6 @@ from ..utils import qualities
class UnistraIE(InfoExtractor): class UnistraIE(InfoExtractor):
_VALID_URL = r'https?://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)' _VALID_URL = r'https?://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)'
_TESTS = [ _TESTS = [
{ {
'url': 'http://utv.unistra.fr/video.php?id_video=154', 'url': 'http://utv.unistra.fr/video.php?id_video=154',
@ -33,9 +32,7 @@ class UnistraIE(InfoExtractor):
] ]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
files = set(re.findall(r'file\s*:\s*"(/[^"]+)"', webpage)) files = set(re.findall(r'file\s*:\s*"(/[^"]+)"', webpage))