[Iwara] Add new metadata extractors

This commit is contained in:
AlexAplin 2018-06-14 23:22:49 -04:00
parent 9b0b627534
commit d26a533272

View File

@ -5,8 +5,13 @@ from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse from ..compat import compat_urllib_parse_urlparse
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
str_to_int,
mimetype2ext, mimetype2ext,
remove_end, remove_end,
clean_html,
get_element_by_class,
get_element_by_id,
unified_strdate,
) )
@ -20,6 +25,12 @@ class IwaraIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': '【MMD R-18】ガールフレンド carry_me_off', 'title': '【MMD R-18】ガールフレンド carry_me_off',
'age_limit': 18, 'age_limit': 18,
'upload_date': '20150828',
'uploader': 'Reimu丨Action',
'description': '禁止转载\n=acfun=\n=bilibili=\n=youtube=\n=stage=\n=motion=\n=camera=\n=dress=',
'comment_count': int,
'like_count': int,
'view_count': int,
}, },
}, { }, {
'url': 'http://ecchi.iwara.tv/videos/Vb4yf2yZspkzkBO', 'url': 'http://ecchi.iwara.tv/videos/Vb4yf2yZspkzkBO',
@ -71,6 +82,23 @@ class IwaraIE(InfoExtractor):
title = remove_end(self._html_search_regex( title = remove_end(self._html_search_regex(
r'<title>([^<]+)</title>', webpage, 'title'), ' | Iwara') r'<title>([^<]+)</title>', webpage, 'title'), ' | Iwara')
upload_date = unified_strdate(self._html_search_regex(
r'作成日:(\d{4}-\d{2}-\d{2})', webpage, 'upload_date', fatal=False))
uploader = get_element_by_class('username', webpage)
description = clean_html(get_element_by_class('field-type-text-with-summary', webpage))
comments_id = get_element_by_id('comments', webpage)
comment_count = int_or_none(self._html_search_regex(
r'([\d,]+)', get_element_by_class('title', comments_id), 'comment_count', fatal=False))
node_views_class = get_element_by_class('node-views', webpage)
like_count = str_to_int(self._html_search_regex(
r'glyphicon-heart[^>]+><\/i>\s+([\d,]+)', node_views_class, 'like_count', fatal=False))
view_count = str_to_int(self._html_search_regex(
r'glyphicon-eye-open[^>]+><\/i>\s+([\d,]+)', node_views_class, 'view_count', fatal=False))
formats = [] formats = []
for a_format in video_data: for a_format in video_data:
format_id = a_format.get('resolution') format_id = a_format.get('resolution')
@ -92,4 +120,10 @@ class IwaraIE(InfoExtractor):
'title': title, 'title': title,
'age_limit': age_limit, 'age_limit': age_limit,
'formats': formats, 'formats': formats,
'upload_date': upload_date,
'uploader': uploader,
'description': description,
'comment_count': comment_count,
'like_count': like_count,
'view_count': view_count,
} }