diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f45ce05ab..a2f38f939 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -144,6 +144,10 @@ from .globo import GloboIE from .godtube import GodTubeIE from .goldenmoustache import GoldenMoustacheIE from .golem import GolemIE +from .gogoanime import ( + GoGoAnimeIE, + GoGoAnimeSearchIE +) from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE from .gorillavid import GorillaVidIE @@ -286,6 +290,11 @@ from .phoenix import PhoenixIE from .photobucket import PhotobucketIE from .planetaplay import PlanetaPlayIE from .played import PlayedIE +from .play44 import ( + Play44IE, + ByZooIE, + Video44IE +) from .playfm import PlayFMIE from .playvid import PlayvidIE from .podomatic import PodomaticIE @@ -341,6 +350,10 @@ from .smotri import ( from .snotr import SnotrIE from .sockshare import SockshareIE from .sohu import SohuIE +from .soulanime import ( + SoulAnimeWatchingIE, + SoulAnimeSeriesIE +) from .soundcloud import ( SoundcloudIE, SoundcloudSetIE, @@ -429,6 +442,7 @@ from .viddler import ViddlerIE from .videobam import VideoBamIE from .videodetective import VideoDetectiveIE from .videolecturesnet import VideoLecturesNetIE +from .videofun import VideoFunIE from .videofyme import VideofyMeIE from .videomega import VideoMegaIE from .videopremium import VideoPremiumIE diff --git a/youtube_dl/extractor/gogoanime.py b/youtube_dl/extractor/gogoanime.py new file mode 100644 index 000000000..c851cf8dd --- /dev/null +++ b/youtube_dl/extractor/gogoanime.py @@ -0,0 +1,109 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + compat_urlparse, + compat_urllib_parse, + get_element_by_attribute, + unescapeHTML +) + + +class GoGoAnimeIE(InfoExtractor): + IE_NAME = 'gogoanime' + IE_DESC = 'GoGoAnime' + + _VALID_URL = r'http://www.gogoanime.com/(?P[A-Za-z0-9-]+)' + + _NOT_FOUND_REGEX = r'Oops! Page Not Found' + _FILEKEY_REGEX = r'flashvars\.filekey="(?P[^"]+)";' + _TITLE_REGEX = r'
[^<]*

([^<]+)

' + + _SINGLEPART_REGEX = r'
[^<]*

' + _MULTIPART_REGEX = r'
[^<]*


' + _POSTCONTENT_REGEX = r'

(?P(?!
)*)
' + _IFRAME_REGEX = r']*src=[\'"](h[^\'"]+)[\'"]' + + """_TEST = { + 'url': 'http://www.gogoanime.com/mahou-shoujo-madoka-magica-episode-12', + 'md5': 'd9b511f92ce9348206f8481ba19dc9f1', + 'info_dict': { + 'id': 'Mahou-Shoujo-Madoka-Magica-12', + 'ext': 'flv', + 'title': 'Mahou-Shoujo-Madoka-Magica-12', + 'description': 'Mahou-Shoujo-Madoka-Magica-12' + } + },""" + _TEST = { + 'url': 'http://www.gogoanime.com/mahou-shoujo-madoka-magica-movie-1', + 'info_dict': { + 'id': 'mahou-shoujo-madoka-magica-movie-1' + }, + 'playlist_count': 3 + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + page = self._download_webpage(url, video_id, "Downloading video page") + + if re.search(self._NOT_FOUND_REGEX, page) is not None: + raise ExtractorError('Video does not exist', expected=True) + + title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False) + description = title + + content = get_element_by_attribute("class", "postcontent", page) + + pattern = re.compile(self._IFRAME_REGEX) + vids = pattern.findall(content) + + vids = [unescapeHTML(compat_urllib_parse.unquote(x)) for x in vids if not re.search(".*videofun.*", x)] + + if (re.search(self._SINGLEPART_REGEX, page)): + return { + '_type': 'url', + 'id': None, + 'url': vids[0], + 'title': title, + 'description': title + } + + if (re.search(self._MULTIPART_REGEX, page)): + return self.playlist_result([self.url_result(vid) for vid in vids], video_id) + + print("Error parsing!") + return {} + + +class GoGoAnimeSearchIE(InfoExtractor): + IE_NAME = 'gogoanime:search' + IE_DESC = 'GoGoAnime Search' + + _VALID_URL = r'http://www\.gogoanime\.com/.*\?s=(?P.*)' + + _POSTLIST_REGEX = r'
[^<]*]*>[^<]*.+)' + + _VIDEO_URL_REGEX = r'_url = "(https?://[^"]+)";' + _TITLE_REGEX = r'.*/(?P[^.]*).' + + _TEST = { + 'url': 'http://play44.net/embed.php?w=600&h=438&vid=M/mahou-shoujo-madoka-magica-07.flv', + 'md5': 'e37e99d665f503dd2db952f7c4dba9e6', + 'info_dict': { + 'id': 'mahou-shoujo-madoka-magica-07', + 'ext': 'flv', + 'title': 'mahou-shoujo-madoka-magica-07', + 'description': 'mahou-shoujo-madoka-magica-07' + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + page = self._download_webpage(url, video_id, "Downloading video page") + + video_url_encoded = self._html_search_regex(self._VIDEO_URL_REGEX, page, 'url', fatal=True) + video_url = compat_urllib_parse.unquote(video_url_encoded) + + title = re.match(self._TITLE_REGEX, video_url).group('title') + + return { + 'id': title, + 'url': video_url, + 'title': title, + 'description': title + } + +class ByZooIE(Play44IE): + IE_NAME = "byzoo" + IE_DESC = "ByZoo" + + _VALID_URL = r'http://[w.]*byzoo\.org/embed\.php[^/]*/(?P<id>.+)' + + _TEST = { + 'url': 'http://byzoo.org/embed.php?w=600&h=438&vid=at/nw/mahou_shoujo_madoka_magica_movie_3_-_part1.mp4', + 'md5': '455c83dabe2cd9fd74a87612b01fe017', + 'info_dict': { + 'id': 'mahou_shoujo_madoka_magica_movie_3_-_part1', + 'ext': 'mp4', + 'title': 'mahou_shoujo_madoka_magica_movie_3_-_part1', + 'description': 'mahou_shoujo_madoka_magica_movie_3_-_part1' + } + } + +class Video44IE(Play44IE): + IE_NAME = "video44" + IE_DESC = "Video44" + + _VALID_URL = r'http://[w.]*video44\.net/.*file=(?P<id>[^&].).*' + + _TEST = { + 'url': 'http://www.video44.net/gogo/?w=600&h=438&file=chaoshead-12.flv&sv=1', + 'md5': '43eaec6d0beb10e8d42459b9f108aff3', + 'info_dict': { + 'id': 'chaoshead-12', + 'ext': 'mp4', + 'title': 'chaoshead-12', + 'description': 'chaoshead-12' + } + } diff --git a/youtube_dl/extractor/soulanime.py b/youtube_dl/extractor/soulanime.py new file mode 100644 index 000000000..27cfccd7e --- /dev/null +++ b/youtube_dl/extractor/soulanime.py @@ -0,0 +1,87 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + compat_urllib_parse +) + + +class SoulAnimeBaseIE(InfoExtractor): + _VID_VALID_URL = r'http://[w.]*soul-anime\.net/watching/(?P<id>[^/]+)' + + _VIDEO_URL_REGEX = r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"' + + def _down_vid(self, url): + mobj = re.match(self._VID_VALID_URL, url) + video_id = mobj.group('id') + + page = self._download_webpage(url, video_id, "Downloading video page") + + video_url_encoded = self._html_search_regex(self._VIDEO_URL_REGEX, page, 'url', fatal=True) + video_url = "http://www.soul-anime.net" + video_url_encoded + + vid = self._request_webpage(video_url, video_id) + ext = vid.getheader("Content-Type").split("/")[1] + + return { + 'id': video_id, + 'url': video_url, + 'ext': ext, + 'title': video_id, + 'description': video_id + } + + +class SoulAnimeWatchingIE(SoulAnimeBaseIE): + IE_NAME = "soulanime:watching" + IE_DESC = "SoulAnime Watching" + + _VALID_URL = SoulAnimeBaseIE._VID_VALID_URL + + _TEST = { + 'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/', + 'md5': '05fae04abf72298098b528e98abf4298', + 'info_dict': { + 'id': 'seirei-tsukai-no-blade-dance-episode-9', + 'ext': 'mp4', + 'title': 'seirei-tsukai-no-blade-dance-episode-9', + 'description': 'seirei-tsukai-no-blade-dance-episode-9' + } + } + + def _real_extract(self, url): + return self._down_vid(url) + + +class SoulAnimeSeriesIE(InfoExtractor): + IE_NAME = "soulanime:series" + IE_DESC = "SoulAnime Series" + + _VALID_URL = r'http://[w.]*soul-anime\.net/anime./(?P<id>[^/]+)' + + _EPISODE_REGEX = r'<option value="(/watching/[^"]+)">[^<]*</option>' + + _TEST = { + 'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/', + 'info_dict': { + 'id': 'black-rock-shooter-tv' + }, + 'playlist_count': 8 + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + series_id = mobj.group('id') + + pattern = re.compile(self._EPISODE_REGEX) + + page = self._download_webpage(url, series_id, "Downloading series page") + + mobj = pattern.findall(page) + + entries = [self.url_result("http://www.soul-anime.net" + obj) for obj in mobj] + + return self.playlist_result(entries, series_id) diff --git a/youtube_dl/extractor/videofun.py b/youtube_dl/extractor/videofun.py new file mode 100644 index 000000000..f92ea5005 --- /dev/null +++ b/youtube_dl/extractor/videofun.py @@ -0,0 +1,48 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + compat_urllib_parse +) + + +class VideoFunIE(InfoExtractor): + IE_NAME = 'videofun' + IE_DESC = 'VideoFun' + + _VALID_URL = r'http://[w.]*videofun\.me/embed/(?P<id>.+)' + + _VIDEO_URL_REGEX = r'url: "(http://gateway\.videofun\.me[^"]+)"' + _TITLE_REGEX = r'.*/(?P<title>[^.]*).' + + _TEST = { + 'url': 'http://videofun.me/embed/8267659be070860af600fee7deadbcdb?w=600&h=438', + 'md5': 'e37e99d665f503dd2db952f7c4dba9e6', + 'info_dict': { + 'id': 'Mahou-Shoujo-Madoka-Magica-07', + 'ext': 'flv', + 'title': 'Mahou-Shoujo-Madoka-Magica-07', + 'description': 'Mahou-Shoujo-Madoka-Magica-07' + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + page = self._download_webpage(url, video_id, "Downloading video page") + + video_url_encoded = self._html_search_regex(self._VIDEO_URL_REGEX, page, 'url', fatal=True) + video_url = compat_urllib_parse.unquote(video_url_encoded) + + title = re.match(self._TITLE_REGEX, video_url).group('title') + + return { + 'id': title, + 'url': video_url, + 'title': title, + 'description': title + }