[kissanime] Add new extractors

Adds support for kissanime.com and kisscartoon.me, for both playlists
and videos.
This commit is contained in:
Cheng Sun 2015-02-26 15:00:03 +00:00
parent a0090691d0
commit a3fee30b56
2 changed files with 214 additions and 0 deletions

View File

@ -233,6 +233,12 @@ from .karaoketv import KaraoketvIE
from .keezmovies import KeezMoviesIE from .keezmovies import KeezMoviesIE
from .khanacademy import KhanAcademyIE from .khanacademy import KhanAcademyIE
from .kickstarter import KickStarterIE from .kickstarter import KickStarterIE
from .kissanime import (
KissAnimeIE,
KissCartoonIE,
KissAnimePlaylistIE,
KissCartoonPlaylistIE,
)
from .keek import KeekIE from .keek import KeekIE
from .kontrtube import KontrTubeIE from .kontrtube import KontrTubeIE
from .krasview import KrasViewIE from .krasview import KrasViewIE

View File

@ -0,0 +1,208 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
import re
from .youtube import YoutubeIE
from .common import InfoExtractor
from ..compat import compat_parse_qs
from ..utils import unescapeHTML
class KissAnimeIE(InfoExtractor):
_VALID_URL_TEMPLATE = r'https?://(?:www\.)?%(host)s/%(type)s/(?P<id>[^/]+/[^/?#]+)'
IE_NAME = 'kissanime'
IE_HOST = 'kissanime.com'
IE_TYPE = 'Anime'
TXHA_BASE64_ENCODED = True
_VALID_URL = _VALID_URL_TEMPLATE % {'host': re.escape(IE_HOST), 'type': re.escape(IE_TYPE)}
_TESTS = [{
'url': 'http://kissanime.com/Anime/Great-Teacher-Onizuka-Sub/Episode-001?id=57217',
'md5': 'c29a73647b075a0dc075485abc197c0b',
'info_dict': {
'id': 'Great-Teacher-Onizuka-Sub/Episode-001',
'ext': 'mp4',
'title': 'Great Teacher Onizuka (Sub) Episode 001',
'description': 'Watch Great Teacher Onizuka (Sub) Episode 001 online in high quality',
'thumbnail': 're:^https?://.*\.jpg$',
'upload_date': '20131105',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage('http://%(host)s/%(type)s/%(id)s' % {
'host': self.IE_HOST, 'type': self.IE_TYPE, 'id': video_id
}, video_id)
# get metadata
metadata = {}
for metatype in ["name", "description", "thumbnailUrl", "uploadDate"]:
r = r'<meta itemprop="%s" content="([^"]*)"/>' % (metatype)
metadata[metatype] = self._html_search_regex(r, webpage, metatype)
# parse date format YYYY-M-D
upload_date_parts = metadata['uploadDate'].split('-')
assert len(upload_date_parts) == 3
for i, x in enumerate([4, 2, 2]):
assert(len(upload_date_parts[i]) <= x)
upload_date_parts[i] = upload_date_parts[i].zfill(x)
upload_date = ''.join(upload_date_parts)
# get flashvars
if self.TXHA_BASE64_ENCODED:
txha_b64 = self._search_regex(r"var txha = '([A-Za-z0-9+/]+={0,2})';", webpage, 'txha (base64)')
txha = base64.b64decode(txha_b64).decode('ascii')
else:
txha = self._search_regex(r"var txha = '([^']+)';", webpage, 'txha')
flashvars_str = unescapeHTML(txha)
flashvars = compat_parse_qs(flashvars_str)
# get fmt info
fmt_list = [tuple(fmt.split('/')) for fmt in flashvars['fmt_list'][0].split(',')]
fmt_stream_map = dict([fmt.split('|') for fmt in flashvars['fmt_stream_map'][0].split(',')])
formats = []
for (fmt, fmt_res) in fmt_list:
width, height = [int(x) for x in fmt_res.split('x')]
formats.append({
'url': fmt_stream_map[fmt],
'ext': YoutubeIE._formats[fmt]['ext'],
'format_id': fmt,
'width': width,
'height': height,
'resolution': fmt_res
})
self._sort_formats(formats)
return {
'id': video_id,
'title': metadata['name'],
'formats': formats,
'description': metadata['description'],
'thumbnail': metadata['thumbnailUrl'],
'upload_date': upload_date
}
class KissAnimePlaylistIE(InfoExtractor):
_VALID_URL_TEMPLATE = r'https?://(?:www\.)?%(host)s/%(type)s/(?P<id>[^/?#]+)/?(?:[\?#].*)?$'
IE_NAME = 'kissanime:playlist'
IE_HOST = 'kissanime.com'
IE_TYPE = 'Anime'
_VALID_URL = _VALID_URL_TEMPLATE % {'host': re.escape(IE_HOST), 'type': re.escape(IE_TYPE)}
_TESTS = [{
'url': 'http://kissanime.com/Anime/Fairy-Tail',
'info_dict': {
'id': 'Fairy-Tail',
'title': 'Fairy Tail (Sub)',
'description': "re:Set in an.*Guild's master\.\xa0\n\nLucy Heartfilia.*Fairy Tail\.\xa0\n\nOne day,.*Fairy Tail\.",
},
'playlist_count': 175,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage('http://%(host)s/%(type)s/%(id)s' % {
'host': self.IE_HOST, 'type': self.IE_TYPE, 'id': playlist_id
}, playlist_id)
# get metadata
name = self._search_regex(r'<a Class="bigChar" href="[^"]*">([^<]*)</a>',
webpage, 'name')
desc = self._html_search_regex(r'<span class="info">Summary:</span>\n(.*)',
webpage, 'desc', fatal=False)
# get entries
entries = []
for m in re.finditer(r"""<tr>\n
<td>\n
<a\ href="/%(type)s/(?P<id>[^"]*)"\ title="(?P<desc>[^"]*)">\n
(?P<name>[^<]*)</a>\n
</td>\n
<td>\n
(?P<date>[0-9/]+)\n
</td>\n
</tr>\n""" % {'type': re.escape(self.IE_TYPE)},
webpage, re.VERBOSE):
# parse date format M/D/YYYY
# note that this date format is different to the meta tag in the
# video page which KissAnimeIE parses
upload_date_parts = m.group('date').split('/')
assert len(upload_date_parts) == 3
canon_date_parts = [None for i in range(3)]
Y,M,D = enumerate([4,2,2])
for date_part, (i, x) in zip(upload_date_parts, [M,D,Y]):
assert(len(date_part) <= x)
canon_date_parts[i] = date_part.zfill(x)
canon_upload_date = ''.join(canon_date_parts)
entries.append({
'_type': 'url',
'ie_key': 'Kiss%s' % self.IE_TYPE,
'url': "http://%s/%s/%s" % (self.IE_HOST, self.IE_TYPE, m.group('id')),
'description': m.group('desc'),
'title': m.group('name'),
'date': canon_upload_date,
})
# sort into chronological order
entries.reverse()
playlist_info = {
'_type': 'playlist',
'id': playlist_id,
'title': name,
'entries': entries,
}
if desc is not None:
playlist_info['description'] = desc
return playlist_info
class KissCartoonIE(KissAnimeIE):
IE_NAME = 'kisscartoon'
IE_HOST = 'kisscartoon.me'
IE_TYPE = 'Cartoon'
TXHA_BASE64_ENCODED = False
_VALID_URL = KissAnimeIE._VALID_URL_TEMPLATE % {'host': re.escape(IE_HOST), 'type': re.escape(IE_TYPE)}
_TESTS = [{
'url': 'http://kisscartoon.me/Cartoon/Adventure-Time-with-Finn-Jake-Season-01/Episode-001?id=4063',
'md5': '8585377f24b2761db1231d34db5ac1fe',
'info_dict': {
'id': 'Adventure-Time-with-Finn-Jake-Season-01/Episode-001',
'ext': 'mp4',
'title': 'Adventure Time with Finn & Jake Season 01 Episode 001',
'description': 'Watch Adventure Time with Finn & Jake Season 01 Episode 001 online in high quality',
'thumbnail': 're:^https?://.*\.jpg$',
'upload_date': '20141105',
}
}]
class KissCartoonPlaylistIE(KissAnimePlaylistIE):
_VALID_URL_TEMPLATE = r'https?://(?:www\.)?%(host)s/%(type)s/(?P<id>[^/]+)'
IE_NAME = 'kisscartoon:playlist'
IE_HOST = 'kisscartoon.me'
IE_TYPE = 'Cartoon'
_VALID_URL = KissAnimePlaylistIE._VALID_URL_TEMPLATE % {'host': re.escape(IE_HOST), 'type': re.escape(IE_TYPE)}
_TESTS = [{
'url': 'http://kisscartoon.me/Cartoon/Archer-Season-02',
'info_dict': {
'id': 'Archer-Season-02',
'title': 'Archer Season 02',
'description': "re:At ISIS.*royally screw each other\.",
},
'playlist_count': 13,
}]