[tistory] Add new extractor

This commit is contained in:
qsniyg 2016-12-16 09:16:06 -08:00
parent dc1f3a9f20
commit 491f42116c
2 changed files with 114 additions and 0 deletions

View File

@ -912,6 +912,7 @@ from .telewebion import TelewebionIE
from .testurl import TestURLIE
from .tf1 import TF1IE
from .tfo import TFOIE
from .tistory import TistoryIE
from .theintercept import TheInterceptIE
from .theplatform import (
ThePlatformIE,

View File

@ -0,0 +1,113 @@
# coding: utf-8
from .common import InfoExtractor
from ..utils import (
url_basename,
mimetype2ext,
HEADRequest,
ExtractorError
)
from ..compat import (
compat_urllib_request,
compat_urlparse,
compat_str
)
import os.path
import cgi
import re
class TistoryIE(InfoExtractor):
_VALID_URL = r'https?://cfile[0-9]*.uf.tistory.com/(?:media|attach|attachment)/(?P<id>[A-Za-z0-9]*)'
_TEST = {
'url': 'http://cfile23.uf.tistory.com/media/111ED14A4FAEBC3C23AAE1',
'md5': '55c32cda7b1a091d75c32aeaaea47595',
'info_dict': {
'id': '207B594C4FAEBBC118096B',
'title': compat_str('함친.wmv-muxed', encoding="UTF-8"),
'ext': 'mp4'
}
}
def unquote(self, url):
return compat_urlparse.unquote(url)
def get_title(self, url, response):
_, params = cgi.parse_header(response.info().get('Content-Disposition', ''))
if "filename" in params:
filename = params["filename"]
else:
filename = url_basename(url)
retval = os.path.splitext(self.unquote(filename))[0]
if type(retval) != compat_str:
retval = retval.decode('UTF-8')
return retval
def _real_extract(self, url):
video_id = self._match_id(url)
self.to_screen('%s: Downloading headers' % (video_id))
req = HEADRequest(url)
head = compat_urllib_request.urlopen(req)
content_type = head.info().get("content-type")
ret = {
"id": compat_str(video_id),
"url": url,
"title": self.get_title(url, head)
}
if content_type == "application/x-shockwave-flash":
swfreq = self._request_webpage(url, video_id, "Downloading SWF")
data = swfreq.read()
a = data[0]
b = data[1]
c = data[2]
if isinstance(a, str):
a = ord(a)
b = ord(b)
c = ord(c)
rawswfdata = data[8:]
if a not in [0x43, 0x46, 0x5A] or b != 0x57 or c != 0x53:
raise ExtractorError("Not a SWF file")
if a == 0x46:
swfdata = rawswfdata
elif a == 0x43:
import zlib
zip = zlib.decompressobj()
swfdata = str(zip.decompress(rawswfdata))
elif a == 0x5A:
import pylzma
rawswfdata = data[11:]
swfdata = str(pylzma.decompress(rawswfdata))
match = re.search("(https?://[A-Za-z0-9.]*/attachment/cfile[0-9]*.uf@[A-Za-z0-9.@%]*)",
swfdata)
if not match:
raise ExtractorError("Unable to find check URL")
checkurl = match.group(1)
checkmatch = re.search("(cfile[0-9]*.uf)@([A-Z0-9]*)", checkurl)
if not checkmatch:
raise ExtractorError("Unable to find real URL in check URL")
cfile = checkmatch.group(1)
url = checkmatch.group(2)
ret["url"] = "http://" + cfile + ".tistory.com/attach/" + url
return self._real_extract(ret["url"])
else:
ret["ext"] = mimetype2ext(content_type)
return ret