[tistory] Add new extractor

This commit is contained in:
qsniyg 2016-12-16 09:16:06 -08:00
parent dc1f3a9f20
commit 491f42116c
2 changed files with 114 additions and 0 deletions

View File

@ -912,6 +912,7 @@ from .telewebion import TelewebionIE
from .testurl import TestURLIE from .testurl import TestURLIE
from .tf1 import TF1IE from .tf1 import TF1IE
from .tfo import TFOIE from .tfo import TFOIE
from .tistory import TistoryIE
from .theintercept import TheInterceptIE from .theintercept import TheInterceptIE
from .theplatform import ( from .theplatform import (
ThePlatformIE, ThePlatformIE,

View File

@ -0,0 +1,113 @@
# coding: utf-8
from .common import InfoExtractor
from ..utils import (
url_basename,
mimetype2ext,
HEADRequest,
ExtractorError
)
from ..compat import (
compat_urllib_request,
compat_urlparse,
compat_str
)
import os.path
import cgi
import re
class TistoryIE(InfoExtractor):
_VALID_URL = r'https?://cfile[0-9]*.uf.tistory.com/(?:media|attach|attachment)/(?P<id>[A-Za-z0-9]*)'
_TEST = {
'url': 'http://cfile23.uf.tistory.com/media/111ED14A4FAEBC3C23AAE1',
'md5': '55c32cda7b1a091d75c32aeaaea47595',
'info_dict': {
'id': '207B594C4FAEBBC118096B',
'title': compat_str('함친.wmv-muxed', encoding="UTF-8"),
'ext': 'mp4'
}
}
def unquote(self, url):
return compat_urlparse.unquote(url)
def get_title(self, url, response):
_, params = cgi.parse_header(response.info().get('Content-Disposition', ''))
if "filename" in params:
filename = params["filename"]
else:
filename = url_basename(url)
retval = os.path.splitext(self.unquote(filename))[0]
if type(retval) != compat_str:
retval = retval.decode('UTF-8')
return retval
def _real_extract(self, url):
video_id = self._match_id(url)
self.to_screen('%s: Downloading headers' % (video_id))
req = HEADRequest(url)
head = compat_urllib_request.urlopen(req)
content_type = head.info().get("content-type")
ret = {
"id": compat_str(video_id),
"url": url,
"title": self.get_title(url, head)
}
if content_type == "application/x-shockwave-flash":
swfreq = self._request_webpage(url, video_id, "Downloading SWF")
data = swfreq.read()
a = data[0]
b = data[1]
c = data[2]
if isinstance(a, str):
a = ord(a)
b = ord(b)
c = ord(c)
rawswfdata = data[8:]
if a not in [0x43, 0x46, 0x5A] or b != 0x57 or c != 0x53:
raise ExtractorError("Not a SWF file")
if a == 0x46:
swfdata = rawswfdata
elif a == 0x43:
import zlib
zip = zlib.decompressobj()
swfdata = str(zip.decompress(rawswfdata))
elif a == 0x5A:
import pylzma
rawswfdata = data[11:]
swfdata = str(pylzma.decompress(rawswfdata))
match = re.search("(https?://[A-Za-z0-9.]*/attachment/cfile[0-9]*.uf@[A-Za-z0-9.@%]*)",
swfdata)
if not match:
raise ExtractorError("Unable to find check URL")
checkurl = match.group(1)
checkmatch = re.search("(cfile[0-9]*.uf)@([A-Z0-9]*)", checkurl)
if not checkmatch:
raise ExtractorError("Unable to find real URL in check URL")
cfile = checkmatch.group(1)
url = checkmatch.group(2)
ret["url"] = "http://" + cfile + ".tistory.com/attach/" + url
return self._real_extract(ret["url"])
else:
ret["ext"] = mimetype2ext(content_type)
return ret