Added new extractors
This commit is contained in:
parent
4d46c1c68c
commit
f9fa66dcac
@ -144,6 +144,10 @@ from .globo import GloboIE
|
||||
from .godtube import GodTubeIE
|
||||
from .goldenmoustache import GoldenMoustacheIE
|
||||
from .golem import GolemIE
|
||||
from .gogoanime import (
|
||||
GoGoAnimeIE,
|
||||
GoGoAnimeSearchIE
|
||||
)
|
||||
from .googleplus import GooglePlusIE
|
||||
from .googlesearch import GoogleSearchIE
|
||||
from .gorillavid import GorillaVidIE
|
||||
@ -286,6 +290,11 @@ from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .planetaplay import PlanetaPlayIE
|
||||
from .played import PlayedIE
|
||||
from .play44 import (
|
||||
Play44IE,
|
||||
ByZooIE,
|
||||
Video44IE
|
||||
)
|
||||
from .playfm import PlayFMIE
|
||||
from .playvid import PlayvidIE
|
||||
from .podomatic import PodomaticIE
|
||||
@ -341,6 +350,10 @@ from .smotri import (
|
||||
from .snotr import SnotrIE
|
||||
from .sockshare import SockshareIE
|
||||
from .sohu import SohuIE
|
||||
from .soulanime import (
|
||||
SoulAnimeWatchingIE,
|
||||
SoulAnimeSeriesIE
|
||||
)
|
||||
from .soundcloud import (
|
||||
SoundcloudIE,
|
||||
SoundcloudSetIE,
|
||||
@ -429,6 +442,7 @@ from .viddler import ViddlerIE
|
||||
from .videobam import VideoBamIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
from .videolecturesnet import VideoLecturesNetIE
|
||||
from .videofun import VideoFunIE
|
||||
from .videofyme import VideofyMeIE
|
||||
from .videomega import VideoMegaIE
|
||||
from .videopremium import VideoPremiumIE
|
||||
|
109
youtube_dl/extractor/gogoanime.py
Normal file
109
youtube_dl/extractor/gogoanime.py
Normal file
@ -0,0 +1,109 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urlparse,
|
||||
compat_urllib_parse,
|
||||
get_element_by_attribute,
|
||||
unescapeHTML
|
||||
)
|
||||
|
||||
|
||||
class GoGoAnimeIE(InfoExtractor):
|
||||
IE_NAME = 'gogoanime'
|
||||
IE_DESC = 'GoGoAnime'
|
||||
|
||||
_VALID_URL = r'http://www.gogoanime.com/(?P<id>[A-Za-z0-9-]+)'
|
||||
|
||||
_NOT_FOUND_REGEX = r'Oops! Page Not Found</font>'
|
||||
_FILEKEY_REGEX = r'flashvars\.filekey="(?P<filekey>[^"]+)";'
|
||||
_TITLE_REGEX = r'<div class="postdesc">[^<]*<h1>([^<]+)</h1>'
|
||||
|
||||
_SINGLEPART_REGEX = r'<div class="postcontent">[^<]*<p><iframe src=[\'"][^>]+></iframe></p>'
|
||||
_MULTIPART_REGEX = r'<div class="postcontent">[^<]*<p><iframe src=[\'"][^>]+></iframe><br />'
|
||||
_POSTCONTENT_REGEX = r'<div class="postcontent">(?P<content>(?!</div>)*)</div>'
|
||||
_IFRAME_REGEX = r'<iframe[^>]*src=[\'"](h[^\'"]+)[\'"]'
|
||||
|
||||
"""_TEST = {
|
||||
'url': 'http://www.gogoanime.com/mahou-shoujo-madoka-magica-episode-12',
|
||||
'md5': 'd9b511f92ce9348206f8481ba19dc9f1',
|
||||
'info_dict': {
|
||||
'id': 'Mahou-Shoujo-Madoka-Magica-12',
|
||||
'ext': 'flv',
|
||||
'title': 'Mahou-Shoujo-Madoka-Magica-12',
|
||||
'description': 'Mahou-Shoujo-Madoka-Magica-12'
|
||||
}
|
||||
},"""
|
||||
_TEST = {
|
||||
'url': 'http://www.gogoanime.com/mahou-shoujo-madoka-magica-movie-1',
|
||||
'info_dict': {
|
||||
'id': 'mahou-shoujo-madoka-magica-movie-1'
|
||||
},
|
||||
'playlist_count': 3
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id, "Downloading video page")
|
||||
|
||||
if re.search(self._NOT_FOUND_REGEX, page) is not None:
|
||||
raise ExtractorError('Video does not exist', expected=True)
|
||||
|
||||
title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False)
|
||||
description = title
|
||||
|
||||
content = get_element_by_attribute("class", "postcontent", page)
|
||||
|
||||
pattern = re.compile(self._IFRAME_REGEX)
|
||||
vids = pattern.findall(content)
|
||||
|
||||
vids = [unescapeHTML(compat_urllib_parse.unquote(x)) for x in vids if not re.search(".*videofun.*", x)]
|
||||
|
||||
if (re.search(self._SINGLEPART_REGEX, page)):
|
||||
return {
|
||||
'_type': 'url',
|
||||
'id': None,
|
||||
'url': vids[0],
|
||||
'title': title,
|
||||
'description': title
|
||||
}
|
||||
|
||||
if (re.search(self._MULTIPART_REGEX, page)):
|
||||
return self.playlist_result([self.url_result(vid) for vid in vids], video_id)
|
||||
|
||||
print("Error parsing!")
|
||||
return {}
|
||||
|
||||
|
||||
class GoGoAnimeSearchIE(InfoExtractor):
|
||||
IE_NAME = 'gogoanime:search'
|
||||
IE_DESC = 'GoGoAnime Search'
|
||||
|
||||
_VALID_URL = r'http://www\.gogoanime\.com/.*\?s=(?P<id>.*)'
|
||||
|
||||
_POSTLIST_REGEX = r'<div class="postlist">[^<]*<p[^>]*>[^<]*<a href="(?P<url>[^"]+)"'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.gogoanime.com/?s=bokusatsu',
|
||||
'info_dict': {
|
||||
'id': 'bokusatsu'
|
||||
},
|
||||
'playlist_count': 6
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id, "Downloading video page")
|
||||
|
||||
pattern = re.compile(self._POSTLIST_REGEX)
|
||||
content = pattern.findall(page)
|
||||
|
||||
return self.playlist_result([self.url_result(vid) for vid in content], video_id)
|
||||
|
82
youtube_dl/extractor/play44.py
Normal file
82
youtube_dl/extractor/play44.py
Normal file
@ -0,0 +1,82 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urllib_parse
|
||||
)
|
||||
|
||||
|
||||
class Play44IE(InfoExtractor):
|
||||
IE_NAME = 'play44'
|
||||
IE_DESC = 'Play44'
|
||||
|
||||
_VALID_URL = r'http://[w.]*play44\.net/embed\.php[^/]*/(?P<id>.+)'
|
||||
|
||||
_VIDEO_URL_REGEX = r'_url = "(https?://[^"]+)";'
|
||||
_TITLE_REGEX = r'.*/(?P<title>[^.]*).'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://play44.net/embed.php?w=600&h=438&vid=M/mahou-shoujo-madoka-magica-07.flv',
|
||||
'md5': 'e37e99d665f503dd2db952f7c4dba9e6',
|
||||
'info_dict': {
|
||||
'id': 'mahou-shoujo-madoka-magica-07',
|
||||
'ext': 'flv',
|
||||
'title': 'mahou-shoujo-madoka-magica-07',
|
||||
'description': 'mahou-shoujo-madoka-magica-07'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id, "Downloading video page")
|
||||
|
||||
video_url_encoded = self._html_search_regex(self._VIDEO_URL_REGEX, page, 'url', fatal=True)
|
||||
video_url = compat_urllib_parse.unquote(video_url_encoded)
|
||||
|
||||
title = re.match(self._TITLE_REGEX, video_url).group('title')
|
||||
|
||||
return {
|
||||
'id': title,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': title
|
||||
}
|
||||
|
||||
class ByZooIE(Play44IE):
|
||||
IE_NAME = "byzoo"
|
||||
IE_DESC = "ByZoo"
|
||||
|
||||
_VALID_URL = r'http://[w.]*byzoo\.org/embed\.php[^/]*/(?P<id>.+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://byzoo.org/embed.php?w=600&h=438&vid=at/nw/mahou_shoujo_madoka_magica_movie_3_-_part1.mp4',
|
||||
'md5': '455c83dabe2cd9fd74a87612b01fe017',
|
||||
'info_dict': {
|
||||
'id': 'mahou_shoujo_madoka_magica_movie_3_-_part1',
|
||||
'ext': 'mp4',
|
||||
'title': 'mahou_shoujo_madoka_magica_movie_3_-_part1',
|
||||
'description': 'mahou_shoujo_madoka_magica_movie_3_-_part1'
|
||||
}
|
||||
}
|
||||
|
||||
class Video44IE(Play44IE):
|
||||
IE_NAME = "video44"
|
||||
IE_DESC = "Video44"
|
||||
|
||||
_VALID_URL = r'http://[w.]*video44\.net/.*file=(?P<id>[^&].).*'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.video44.net/gogo/?w=600&h=438&file=chaoshead-12.flv&sv=1',
|
||||
'md5': '43eaec6d0beb10e8d42459b9f108aff3',
|
||||
'info_dict': {
|
||||
'id': 'chaoshead-12',
|
||||
'ext': 'mp4',
|
||||
'title': 'chaoshead-12',
|
||||
'description': 'chaoshead-12'
|
||||
}
|
||||
}
|
87
youtube_dl/extractor/soulanime.py
Normal file
87
youtube_dl/extractor/soulanime.py
Normal file
@ -0,0 +1,87 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urllib_parse
|
||||
)
|
||||
|
||||
|
||||
class SoulAnimeBaseIE(InfoExtractor):
|
||||
_VID_VALID_URL = r'http://[w.]*soul-anime\.net/watching/(?P<id>[^/]+)'
|
||||
|
||||
_VIDEO_URL_REGEX = r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"'
|
||||
|
||||
def _down_vid(self, url):
|
||||
mobj = re.match(self._VID_VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id, "Downloading video page")
|
||||
|
||||
video_url_encoded = self._html_search_regex(self._VIDEO_URL_REGEX, page, 'url', fatal=True)
|
||||
video_url = "http://www.soul-anime.net" + video_url_encoded
|
||||
|
||||
vid = self._request_webpage(video_url, video_id)
|
||||
ext = vid.getheader("Content-Type").split("/")[1]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': ext,
|
||||
'title': video_id,
|
||||
'description': video_id
|
||||
}
|
||||
|
||||
|
||||
class SoulAnimeWatchingIE(SoulAnimeBaseIE):
|
||||
IE_NAME = "soulanime:watching"
|
||||
IE_DESC = "SoulAnime Watching"
|
||||
|
||||
_VALID_URL = SoulAnimeBaseIE._VID_VALID_URL
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/',
|
||||
'md5': '05fae04abf72298098b528e98abf4298',
|
||||
'info_dict': {
|
||||
'id': 'seirei-tsukai-no-blade-dance-episode-9',
|
||||
'ext': 'mp4',
|
||||
'title': 'seirei-tsukai-no-blade-dance-episode-9',
|
||||
'description': 'seirei-tsukai-no-blade-dance-episode-9'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self._down_vid(url)
|
||||
|
||||
|
||||
class SoulAnimeSeriesIE(InfoExtractor):
|
||||
IE_NAME = "soulanime:series"
|
||||
IE_DESC = "SoulAnime Series"
|
||||
|
||||
_VALID_URL = r'http://[w.]*soul-anime\.net/anime./(?P<id>[^/]+)'
|
||||
|
||||
_EPISODE_REGEX = r'<option value="(/watching/[^"]+)">[^<]*</option>'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/',
|
||||
'info_dict': {
|
||||
'id': 'black-rock-shooter-tv'
|
||||
},
|
||||
'playlist_count': 8
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
series_id = mobj.group('id')
|
||||
|
||||
pattern = re.compile(self._EPISODE_REGEX)
|
||||
|
||||
page = self._download_webpage(url, series_id, "Downloading series page")
|
||||
|
||||
mobj = pattern.findall(page)
|
||||
|
||||
entries = [self.url_result("http://www.soul-anime.net" + obj) for obj in mobj]
|
||||
|
||||
return self.playlist_result(entries, series_id)
|
48
youtube_dl/extractor/videofun.py
Normal file
48
youtube_dl/extractor/videofun.py
Normal file
@ -0,0 +1,48 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urllib_parse
|
||||
)
|
||||
|
||||
|
||||
class VideoFunIE(InfoExtractor):
|
||||
IE_NAME = 'videofun'
|
||||
IE_DESC = 'VideoFun'
|
||||
|
||||
_VALID_URL = r'http://[w.]*videofun\.me/embed/(?P<id>.+)'
|
||||
|
||||
_VIDEO_URL_REGEX = r'url: "(http://gateway\.videofun\.me[^"]+)"'
|
||||
_TITLE_REGEX = r'.*/(?P<title>[^.]*).'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://videofun.me/embed/8267659be070860af600fee7deadbcdb?w=600&h=438',
|
||||
'md5': 'e37e99d665f503dd2db952f7c4dba9e6',
|
||||
'info_dict': {
|
||||
'id': 'Mahou-Shoujo-Madoka-Magica-07',
|
||||
'ext': 'flv',
|
||||
'title': 'Mahou-Shoujo-Madoka-Magica-07',
|
||||
'description': 'Mahou-Shoujo-Madoka-Magica-07'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id, "Downloading video page")
|
||||
|
||||
video_url_encoded = self._html_search_regex(self._VIDEO_URL_REGEX, page, 'url', fatal=True)
|
||||
video_url = compat_urllib_parse.unquote(video_url_encoded)
|
||||
|
||||
title = re.match(self._TITLE_REGEX, video_url).group('title')
|
||||
|
||||
return {
|
||||
'id': title,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': title
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user