Add basic (non-concatenating) support for playlists

This commit is contained in:
qsniyg 2016-12-16 23:47:35 -08:00
parent 8f2b90df53
commit 7829a5c2db
2 changed files with 165 additions and 78 deletions

View File

@ -912,7 +912,10 @@ from .telewebion import TelewebionIE
from .testurl import TestURLIE from .testurl import TestURLIE
from .tf1 import TF1IE from .tf1 import TF1IE
from .tfo import TFOIE from .tfo import TFOIE
from .tistory import TistoryIE from .tistory import (
TistoryIE,
TistoryPlaylistIE
)
from .theintercept import TheInterceptIE from .theintercept import TheInterceptIE
from .theplatform import ( from .theplatform import (
ThePlatformIE, ThePlatformIE,

View File

@ -10,6 +10,7 @@ from ..utils import (
) )
from ..compat import ( from ..compat import (
compat_urllib_request, compat_urllib_request,
compat_urllib_error,
compat_urlparse, compat_urlparse,
compat_str compat_str
) )
@ -17,72 +18,88 @@ from ..compat import (
import os.path import os.path
import cgi import cgi
import re import re
import xml.etree.ElementTree as ET
class TistoryIE(InfoExtractor): class TistoryBaseIE(InfoExtractor):
_VALID_URL = r'https?://cfile[0-9]*.uf.tistory.com/(?:media|attach|attachment|original)/(?P<id>[A-Za-z0-9]*)' _TI_MEDIA_URL = r'https?://cfile[0-9]*.uf.tistory.com/(?:media|attach|attachment|original)/(?P<id>[A-Za-z0-9]*)'
_TESTS = [ def _ti_unquote(self, url):
{
'url': 'http://cfile23.uf.tistory.com/media/111ED14A4FAEBC3C23AAE1',
'md5': '55c32cda7b1a091d75c32aeaaea47595',
'info_dict': {
'id': '207B594C4FAEBBC118096B',
'title': '함친.wmv-muxed',
'ext': 'mp4'
},
},
{
'url': 'http://cfile24.uf.tistory.com/original/1870B0374FBD97A80980D2',
'md5': 'dad089588a30447c0e51c78f29a9183e',
'info_dict': {
'id': '1870B0374FBD97A80980D2',
'title': '무제-1',
'ext': 'flv'
}
}
]
def unquote(self, url):
return compat_urlparse.unquote(url) return compat_urlparse.unquote(url)
def get_title(self, url, response): def _ti_get_title(self, url, response):
_, params = cgi.parse_header(response.info().get('Content-Disposition', '')) _, params = cgi.parse_header(response.info().get('Content-Disposition', ''))
if "filename" in params: if "filename" in params:
filename = params["filename"] filename = params["filename"]
else: else:
filename = url_basename(url) filename = url_basename(url)
retval = os.path.splitext(self.unquote(filename))[0] retval = os.path.splitext(self._ti_unquote(filename))[0]
if type(retval) != compat_str: if type(retval) != compat_str:
retval = retval.decode('UTF-8') retval = retval.decode('UTF-8')
return retval return retval
def get_ext(self, mime): def _ti_get_ext(self, mime):
ext = mimetype2ext(mime) ext = mimetype2ext(mime)
if ext == "x-shockwave-flash": if ext == "x-shockwave-flash":
ext = "flv" ext = "flv"
return ext return ext
def _real_extract(self, url): def _ti_get_real_from_check(self, check):
video_id = self._match_id(url) checkmatch = re.search("(cfile[0-9]*.uf)@([A-Z0-9]*)(?:\.([A-Za-z0-9]*))?", check)
if not checkmatch:
return None
cfile = checkmatch.group(1)
url = checkmatch.group(2)
ext = None
if len(checkmatch.groups()) > 2:
ext = checkmatch.group(3)
return ("http://" + cfile + ".tistory.com/attach/" + url, ext)
def _ti_get_video_id(self, url):
if '_TI_MEDIA_URL_RE' not in self.__dict__:
self._TI_MEDIA_URL_RE = re.compile(self._TI_MEDIA_URL)
m = self._TI_MEDIA_URL_RE.match(url)
assert m
return m.group('id')
def _ti_get_headers(self, url, video_id):
self.to_screen('%s: Downloading headers' % (video_id)) self.to_screen('%s: Downloading headers' % (video_id))
req = HEADRequest(url) req = HEADRequest(url)
head = compat_urllib_request.urlopen(req) return compat_urllib_request.urlopen(req)
def _ti_detect_swf(self, head):
content_type = head.info().get("content-type") content_type = head.info().get("content-type")
content_length = int(head.info().get("content-length")) content_length = int(head.info().get("content-length"))
ret = { if content_type == "application/x-shockwave-flash" and content_length < 200000:
return True
return False
def _ti_get_media(self, url, video_id, head, ext=None, title=None):
if head:
content_type = head.info().get("content-type")
ext = self._ti_get_ext(content_type)
title = self._ti_get_title(url, head)
if not title:
title = video_id
return {
"id": compat_str(video_id), "id": compat_str(video_id),
"url": url, "url": url,
"title": self.get_title(url, head) "title": title,
"ext": ext
} }
if content_type == "application/x-shockwave-flash" and content_length < 200000: def _ti_read_swf(self, url, video_id, head):
swfreq = self._request_webpage(url, video_id, "Downloading SWF") swfreq = self._request_webpage(url, video_id, "Downloading SWF")
data = swfreq.read() data = swfreq.read()
@ -118,15 +135,82 @@ class TistoryIE(InfoExtractor):
checkurl = match.group(1) checkurl = match.group(1)
checkmatch = re.search("(cfile[0-9]*.uf)@([A-Z0-9]*)", checkurl) real_url, ext = self._ti_get_real_from_check(checkurl)
if not checkmatch: if not real_url:
raise ExtractorError("Unable to find real URL in check URL") raise ExtractorError("Unable to find real URL in check URL")
cfile = checkmatch.group(1) return (real_url, ext)
url = checkmatch.group(2)
ret["url"] = "http://" + cfile + ".tistory.com/attach/" + url def _ti_dl(self, url, ext=None, title=None):
return self._real_extract(ret["url"]) video_id = self._ti_get_video_id(url)
head = None
try:
head = self._ti_get_headers(url, video_id)
except compat_urllib_error.HTTPError:
pass
except Exception:
head = None
if head and self._ti_detect_swf(head):
return self._ti_dl(*self._ti_read_swf(url, video_id, head))
else: else:
ret["ext"] = self.get_ext(content_type) return self._ti_get_media(url, video_id, head, ext, title)
return ret
class TistoryIE(TistoryBaseIE):
_VALID_URL = TistoryBaseIE._TI_MEDIA_URL
_TESTS = [
{
'url': 'http://cfile23.uf.tistory.com/media/111ED14A4FAEBC3C23AAE1',
'md5': '55c32cda7b1a091d75c32aeaaea47595',
'info_dict': {
'id': '207B594C4FAEBBC118096B',
'title': '함친.wmv-muxed',
'ext': 'mp4'
},
},
{
'url': 'http://cfile24.uf.tistory.com/original/1870B0374FBD97A80980D2',
'md5': 'dad089588a30447c0e51c78f29a9183e',
'info_dict': {
'id': '1870B0374FBD97A80980D2',
'title': '무제-1',
'ext': 'flv'
}
}
]
def _real_extract(self, url):
return self._ti_dl(url)
class TistoryPlaylistIE(TistoryBaseIE):
_VALID_URL = r'(?:https?://cfs.tistory.com/custom/blog/.*/skin/images/po.swf?.*file=)?(?P<rurl>https?://cfs.tistory.com/custom/blog/.*/skin/images/(?P<id>.*)\.xml).*'
def _real_extract(self, url):
video_id = self._match_id(url)
rurl = self._VALID_URL_RE.match(url).group("rurl")
xml = self._download_xml(rurl, video_id)
entries = []
for tracklist in xml:
for track in tracklist:
for tag in track:
print(ET.tostring(tag))
if "location" not in tag.tag:
continue
loc = tag.text
newloc, ext = self._ti_get_real_from_check(loc)
if newloc:
loc = newloc
entries.append(self._ti_dl(loc, ext))
return self.playlist_result(entries)