Fix tests and rely on _match_id
for some extractors
This commit is contained in:
parent
8f0cf20ab9
commit
f04a83da42
@ -26,9 +26,7 @@ class AnySexIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_url = self._html_search_regex(r"video_url\s*:\s*'([^']+)'", webpage, 'video URL')
|
video_url = self._html_search_regex(r"video_url\s*:\s*'([^']+)'", webpage, 'video URL')
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -8,7 +7,7 @@ from ..utils import ExtractorError
|
|||||||
|
|
||||||
|
|
||||||
class BYUtvIE(InfoExtractor):
|
class BYUtvIE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
|
_VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<id>[^/?#]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
|
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
|
||||||
'md5': '05850eb8c749e2ee05ad5a1c34668493',
|
'md5': '05850eb8c749e2ee05ad5a1c34668493',
|
||||||
@ -27,15 +26,14 @@ class BYUtvIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('video_id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
episode_code = self._search_regex(
|
episode_code = self._search_regex(
|
||||||
r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
|
r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
|
||||||
episode_json = re.sub(
|
ep = self._parse_json(re.sub(
|
||||||
r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code)
|
r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"',
|
||||||
ep = json.loads(episode_json)
|
episode_code), video_id)
|
||||||
|
|
||||||
if ep['providerType'] == 'Ooyala':
|
if ep['providerType'] == 'Ooyala':
|
||||||
return {
|
return {
|
||||||
|
@ -1,9 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
@ -30,16 +27,14 @@ class ClubicIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id
|
player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id
|
||||||
player_page = self._download_webpage(player_url, video_id)
|
player_page = self._download_webpage(player_url, video_id)
|
||||||
|
|
||||||
config_json = self._search_regex(
|
config = self._parse_json(self._search_regex(
|
||||||
r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page,
|
r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page,
|
||||||
'configuration')
|
'configuration'), video_id)
|
||||||
config = json.loads(config_json)
|
|
||||||
|
|
||||||
video_info = config['videoInfo']
|
video_info = config['videoInfo']
|
||||||
sources = config['sources']
|
sources = config['sources']
|
||||||
|
@ -1,8 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
@ -20,16 +18,15 @@ class CriterionIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
final_url = self._search_regex(
|
final_url = self._search_regex(
|
||||||
r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
|
r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
|
||||||
title = self._og_search_title(webpage)
|
title = self._og_search_title(webpage)
|
||||||
description = self._html_search_meta('description', webpage)
|
description = self._html_search_meta('description', webpage)
|
||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
r'so.addVariable\("thumbnailURL", "(.+?)"\)\;',
|
r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;',
|
||||||
webpage, 'thumbnail url')
|
webpage, 'thumbnail url')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .zdf import ZDFIE
|
from .zdf import ZDFIE
|
||||||
|
|
||||||
|
|
||||||
@ -32,7 +30,6 @@ class DreiSatIE(ZDFIE):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
||||||
return self.extract_from_xml_url(video_id, details_url)
|
return self.extract_from_xml_url(video_id, details_url)
|
||||||
|
@ -26,8 +26,7 @@ class DropboxIE(InfoExtractor):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
fn = compat_urllib_parse_unquote(url_basename(url))
|
fn = compat_urllib_parse_unquote(url_basename(url))
|
||||||
title = os.path.splitext(fn)[0]
|
title = os.path.splitext(fn)[0]
|
||||||
video_url = re.sub(r'[?&]dl=0', '', url)
|
video_url = re.sub(r'[?&]dl=0', '', url)
|
||||||
|
@ -20,8 +20,8 @@ class FreesoundIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
music_id = self._match_id(url)
|
||||||
music_id = mobj.group('id')
|
|
||||||
webpage = self._download_webpage(url, music_id)
|
webpage = self._download_webpage(url, music_id)
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<div id="single_sample_header">.*?<a href="#">(.+?)</a>',
|
r'<div id="single_sample_header">.*?<a href="#">(.+?)</a>',
|
||||||
|
@ -1,8 +1,6 @@
|
|||||||
# encoding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
@ -19,9 +17,7 @@ class InaIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
mrss_url = 'http://player.ina.fr/notices/%s.mrss' % video_id
|
mrss_url = 'http://player.ina.fr/notices/%s.mrss' % video_id
|
||||||
info_doc = self._download_xml(mrss_url, video_id)
|
info_doc = self._download_xml(mrss_url, video_id)
|
||||||
|
|
||||||
|
@ -1,14 +1,11 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class MoviezineIE(InfoExtractor):
|
class MoviezineIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?moviezine\.se/video/(?P<id>[^?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?moviezine\.se/video/(?P<id>[^?#]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.moviezine.se/video/205866',
|
'url': 'http://www.moviezine.se/video/205866',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -21,8 +18,7 @@ class MoviezineIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
jsplayer = self._download_webpage('http://www.moviezine.se/api/player.js?video=%s' % video_id, video_id, 'Downloading js api player')
|
jsplayer = self._download_webpage('http://www.moviezine.se/api/player.js?video=%s' % video_id, video_id, 'Downloading js api player')
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import str_or_none
|
from ..utils import str_or_none
|
||||||
|
|
||||||
@ -10,20 +8,19 @@ class ReverbNationIE(InfoExtractor):
|
|||||||
_VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
|
_VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
|
'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
|
||||||
'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
|
'md5': 'c0aaf339bcee189495fdf5a8c8ba8645',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '16965047',
|
'id': '16965047',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'MONA LISA',
|
'title': 'MONA LISA',
|
||||||
'uploader': 'ALKILADOS',
|
'uploader': 'ALKILADOS',
|
||||||
'uploader_id': '216429',
|
'uploader_id': '216429',
|
||||||
'thumbnail': 're:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$'
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
song_id = self._match_id(url)
|
||||||
song_id = mobj.group('id')
|
|
||||||
|
|
||||||
api_res = self._download_json(
|
api_res = self._download_json(
|
||||||
'https://api.reverbnation.com/song/%s' % song_id,
|
'https://api.reverbnation.com/song/%s' % song_id,
|
||||||
@ -31,14 +28,20 @@ class ReverbNationIE(InfoExtractor):
|
|||||||
note='Downloading information of song %s' % song_id
|
note='Downloading information of song %s' % song_id
|
||||||
)
|
)
|
||||||
|
|
||||||
|
thumbnails = [{
|
||||||
|
'url': api_res.get('image'),
|
||||||
|
}, {
|
||||||
|
'url': api_res.get('thumbnail'),
|
||||||
|
'preference': -2,
|
||||||
|
}]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': song_id,
|
'id': song_id,
|
||||||
'title': api_res.get('name'),
|
'title': api_res['name'],
|
||||||
'url': api_res.get('url'),
|
'url': api_res['url'],
|
||||||
'uploader': api_res.get('artist', {}).get('name'),
|
'uploader': api_res.get('artist', {}).get('name'),
|
||||||
'uploader_id': str_or_none(api_res.get('artist', {}).get('id')),
|
'uploader_id': str_or_none(api_res.get('artist', {}).get('id')),
|
||||||
'thumbnail': self._proto_relative_url(
|
'thumbnails': thumbnails,
|
||||||
api_res.get('image', api_res.get('thumbnail'))),
|
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
@ -9,7 +7,7 @@ class SlutloadIE(InfoExtractor):
|
|||||||
_VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
|
_VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
|
'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
|
||||||
'md5': '0cf531ae8006b530bd9df947a6a0df77',
|
'md5': '868309628ba00fd488cf516a113fd717',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'TD73btpBqSxc',
|
'id': 'TD73btpBqSxc',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -20,8 +18,7 @@ class SlutloadIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
get_element_by_attribute,
|
get_element_by_class,
|
||||||
clean_html,
|
clean_html,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -41,15 +41,14 @@ class TechTalksIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
talk_id = self._match_id(url)
|
||||||
talk_id = mobj.group('id')
|
|
||||||
webpage = self._download_webpage(url, talk_id)
|
webpage = self._download_webpage(url, talk_id)
|
||||||
rtmp_url = self._search_regex(
|
rtmp_url = self._search_regex(
|
||||||
r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url')
|
r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url')
|
||||||
play_path = self._search_regex(
|
play_path = self._search_regex(
|
||||||
r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
|
r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
|
||||||
webpage, 'presenter play path')
|
webpage, 'presenter play path')
|
||||||
title = clean_html(get_element_by_attribute('class', 'title', webpage))
|
title = clean_html(get_element_by_class('title', webpage))
|
||||||
video_info = {
|
video_info = {
|
||||||
'id': talk_id,
|
'id': talk_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
@ -8,7 +8,6 @@ from ..utils import qualities
|
|||||||
|
|
||||||
class UnistraIE(InfoExtractor):
|
class UnistraIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)'
|
_VALID_URL = r'https?://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://utv.unistra.fr/video.php?id_video=154',
|
'url': 'http://utv.unistra.fr/video.php?id_video=154',
|
||||||
@ -33,9 +32,7 @@ class UnistraIE(InfoExtractor):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
files = set(re.findall(r'file\s*:\s*"(/[^"]+)"', webpage))
|
files = set(re.findall(r'file\s*:\s*"(/[^"]+)"', webpage))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user