# coding: utf-8 from __future__ import unicode_literals import re import time from ..utils import ( ExtractorError, decode_packed_codes, get_element_by_class, get_element_by_id, int_or_none, float_or_none, parse_filesize, strip_or_none, ) from .common import InfoExtractor class Mp4UploadIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?mp4upload\.com/(?:embed-)?(?P[a-z\d]+)' _TESTS = [{ 'url': 'http://www.mp4upload.com/e52ycvdl4x29', 'md5': '09780a74b0de79ada5f9a8955f0704fc', 'info_dict': { 'id': 'e52ycvdl4x29', 'ext': 'mp4', 'title': '橋本潮 - ロマンティックあげるよ.mp4', 'timestamp': 1467471956, 'thumbnail': r're:^https?://.*\.jpg$', 'vcodec': 'ffh264', 'width': 454, 'height': 360, 'fps': 29.970, 'acodec': 'ffaac', 'asr': 44100, 'abr': 96, 'upload_date': '20160702', }, }, { 'url': 'https://www.mp4upload.com/embed-e52ycvdl4x29.html', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) page_url = 'https://www.mp4upload.com/%s' % video_id embed_url = 'https://www.mp4upload.com/embed-%s.html' % video_id webpage = self._download_webpage(page_url, video_id) if 'File not found' in webpage or 'File Not Found' in webpage: raise ExtractorError('File not found', expected=True, video_id=video_id) title = strip_or_none(get_element_by_class('dfilename', webpage)) if not title: raise ExtractorError('Title not found', expected=True, video_id=video_id) info_dict = { 'title': title, 'id': video_id, } embedpage = self._download_webpage(embed_url, video_id, note='Downloading embed webpage') # It contains only `source url` and `thumbnail` poor_info_dict = self._extract_jwplayer_data( decode_packed_codes( get_element_by_id('player', embedpage) ).replace('\\\'', '"'), video_id, base_url=embed_url, require_title=False ) if not poor_info_dict: raise ExtractorError('I can\'t find player data', video_id=video_id) info_dict['thumbnail'] = poor_info_dict.get('thumbnail') _f = { 'url': poor_info_dict.get('formats', [{}])[0].get('url'), 'ext': poor_info_dict.get('formats', [{}])[0].get('ext'), 'format_id': '1', } file_info = re.findall( r'>(?P