[vshare] extractor rewritten

This commit is contained in:
Timendum 2017-11-13 12:23:32 +01:00
parent 55c727a547
commit d5000890aa
2 changed files with 47 additions and 8 deletions

View File

@ -102,6 +102,7 @@ from .joj import JojIE
from .megaphone import MegaphoneIE from .megaphone import MegaphoneIE
from .vzaar import VzaarIE from .vzaar import VzaarIE
from .channel9 import Channel9IE from .channel9 import Channel9IE
from .vshare import VShareIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -1921,6 +1922,16 @@ class GenericIE(InfoExtractor):
'title': 'Rescue Kit 14 Free Edition - Getting started', 'title': 'Rescue Kit 14 Free Edition - Getting started',
}, },
'playlist_count': 4, 'playlist_count': 4,
},
{
# vshare embed
'url': 'https://youtube-dl-demo.neocities.org/vshare.html',
'md5': '17b39f55b5497ae8b59f5fbce8e35886',
'info_dict': {
'id': '0f64ce6',
'title': 'vl14062007715967',
'ext': 'mp4',
}
} }
# { # {
# # TODO: find another test # # TODO: find another test
@ -2879,6 +2890,11 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
channel9_urls, video_id, video_title, ie=Channel9IE.ie_key()) channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
vshare_urls = VShareIE._extract_urls(webpage)
if vshare_urls:
return self.playlist_from_matches(
vshare_urls, video_id, video_title, ie=VShareIE.ie_key())
def merge_dicts(dict1, dict2): def merge_dicts(dict1, dict2):
merged = {} merged = {}
for k, v in dict1.items(): for k, v in dict1.items():

View File

@ -1,14 +1,21 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_chr,
)
from ..utils import (
decode_packed_codes,
)
class VShareIE(InfoExtractor): class VShareIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://vshare.io/d/0f64ce6', 'url': 'https://vshare.io/d/0f64ce6',
'md5': '16d7b8fef58846db47419199ff1ab3e7', 'md5': '17b39f55b5497ae8b59f5fbce8e35886',
'info_dict': { 'info_dict': {
'id': '0f64ce6', 'id': '0f64ce6',
'title': 'vl14062007715967', 'title': 'vl14062007715967',
@ -19,20 +26,36 @@ class VShareIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
def _extract_packed(self, webpage):
packed = self._search_regex(r'(eval\(function.+)', webpage, 'packed code')
unpacked = decode_packed_codes(packed)
digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
digits = digits.split(',')
digits = [int(digit) for digit in digits]
key_digit = self._search_regex(r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
chars = [compat_chr(d - int(key_digit)) for d in digits]
return ''.join(chars)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage( webpage = self._download_webpage(
'https://vshare.io/d/%s' % video_id, video_id) 'https://vshare.io/v/%s/width-650/height-430/1' % video_id, video_id)
title = self._html_search_regex( title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
r'(?s)<div id="root-container">(.+?)<br/>', webpage, 'title') title = title.split(' - ')[0]
video_url = self._search_regex(
r'<a[^>]+href=(["\'])(?P<url>(?:https?:)?//.+?)\1[^>]*>[Cc]lick\s+here',
webpage, 'video url', group='url')
unpacked = self._extract_packed(webpage)
video_urls = re.findall(r'<source src="([^"]+)', unpacked)
formats = [{'url': video_url} for video_url in video_urls]
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'url': video_url, 'formats': formats,
} }
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
webpage)