[gfycat] Add new extractor

This commit is contained in:
alex 2015-04-15 18:13:16 -04:00
parent 9fc03aa87c
commit 1d0141565d
2 changed files with 105 additions and 0 deletions

View File

@ -185,6 +185,7 @@ from .gametrailers import GametrailersIE
from .gazeta import GazetaIE
from .gdcvault import GDCVaultIE
from .generic import GenericIE
from .gfycat import GfycatIE
from .giantbomb import GiantBombIE
from .giga import GigaIE
from .glide import GlideIE

View File

@ -0,0 +1,104 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import (
int_or_none,
js_to_json,
mimetype2ext,
ExtractorError,
)
class GfycatIE(InfoExtractor):
_VALID_URL = r'https?://(?:\w+\.)?gfycat\.com/(?P<id>[a-zA-Z]+)(\.(?P<ext>gif|webm|mp4))?'
_TESTS = [{
'url': 'http://gfycat.com/RequiredUnkemptBuzzard',
'info_dict': {
'id': 'RequiredUnkemptBuzzard',
'title': 'Headshot!',
'ext': 'mp4'
},
}, {
'url': 'https://giant.gfycat.com/RequiredUnkemptBuzzard.gif',
'info_dict': {
'id': 'RequiredUnkemptBuzzard',
'title': 'Headshot!',
'ext': 'gif'
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
parse = re.search(self._VALID_URL, url)
userExt = None
if parse.group('ext'):
userExt = parse.group('ext')
url = 'http://gfycat.com/'+video_id
webpage = self._download_webpage(url, video_id)
width = int_or_none(self._search_regex(
r'gfyWidth[\s=]*?"(?P<width>\d+?)"',
webpage, 'width', fatal=False))
height = int_or_none(self._search_regex(
r'gfyHeight[\s=]*?"(?P<height>\d+?)"',
webpage, 'height', fatal=False))
framerate = int_or_none(self._search_regex(
r'gfyFrameRate[\s=]*?"(?P<framerate>\d+?)"',
webpage, 'framerate', fatal=False))
frames = int_or_none(self._search_regex(
r'gfyNumFrames[\s=]*?"(?P<frames>\d+?)"',
webpage, 'frames', fatal=False))
views = int_or_none(self._search_regex(
r'gfyViews[\s=]*?"(?P<views>\d+?)"',
webpage, 'views', fatal=False))
title = self._search_regex(r'class="gfyTitle">(?P<title>[^<]*)',webpage, 'title', fatal=False)
formats = []
x=0
for f in ['image/webm','image/gif','video/mp4']:
preference = False
fext = f.partition('/')[2]
furl = re.search('gfy'+fext.title()+'Url[\s=]*?"(.*?)"', webpage)
fsize = re.search('gfy'+fext.title()+'Size[\s=]*?"(.*?)"', webpage)
if fext == userExt:
preference=1000
formats.append({
'format_id': f.partition('/')[2],
'url': self._proto_relative_url(furl.group(1)),
'acodec': 'none',
'ext':f.partition('/')[2],
'width': width,
'vbr':float(fsize.group(1))/(frames/framerate)/1024,
'preference':x if not preference else preference,
'fps':framerate,
'height': height,
'bytesize': fsize.group(1),
'id':video_id,
'http_headers': {
'User-Agent': 'youtube-dl (like wget)',
},
})
x+=1
if not len(formats):
raise ExtractorError('No sources found for gfycat %s. be sure to link to the page with the embed on it.' % video_id, expected=True)
self._sort_formats(formats)
ret = {
'id': video_id,
'formats': formats,
'title': title,
'duration':(frames/framerate),
'view_count':views
}
# print json.dumps(ret, sort_keys=True, indent=4, separators=(',', ': '))
return ret