update new
This commit is contained in:
commit
38e70fb328
@ -26,7 +26,6 @@ from youtube_dl.extractor import (
|
||||
ThePlatformIE,
|
||||
ThePlatformFeedIE,
|
||||
RTVEALaCartaIE,
|
||||
FunnyOrDieIE,
|
||||
DemocracynowIE,
|
||||
)
|
||||
|
||||
@ -322,18 +321,6 @@ class TestRtveSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
|
||||
|
||||
|
||||
class TestFunnyOrDieSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine'
|
||||
IE = FunnyOrDieIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||
self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
|
||||
|
||||
|
||||
class TestDemocracynowSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.democracynow.org/shows/2015/7/3'
|
||||
IE = DemocracynowIE
|
||||
|
@ -92,6 +92,7 @@ from .utils import (
|
||||
YoutubeDLCookieJar,
|
||||
YoutubeDLCookieProcessor,
|
||||
YoutubeDLHandler,
|
||||
YoutubeDLRedirectHandler,
|
||||
)
|
||||
from .cache import Cache
|
||||
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
|
||||
@ -2343,6 +2344,7 @@ class YoutubeDL(object):
|
||||
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
|
||||
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
|
||||
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
|
||||
redirect_handler = YoutubeDLRedirectHandler()
|
||||
data_handler = compat_urllib_request_DataHandler()
|
||||
|
||||
# When passing our own FileHandler instance, build_opener won't add the
|
||||
@ -2356,7 +2358,7 @@ class YoutubeDL(object):
|
||||
file_handler.file_open = file_open
|
||||
|
||||
opener = compat_urllib_request.build_opener(
|
||||
proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
|
||||
proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
|
||||
|
||||
# Delete the default user-agent header, which would otherwise apply in
|
||||
# cases where our custom HTTP handler doesn't come into play
|
||||
|
@ -4,7 +4,6 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
encode_base_n,
|
||||
ExtractorError,
|
||||
@ -55,7 +54,7 @@ class EpornerIE(InfoExtractor):
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
|
||||
video_id = self._match_id(compat_str(urlh.geturl()))
|
||||
video_id = self._match_id(urlh.geturl())
|
||||
|
||||
hash = self._search_regex(
|
||||
r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
|
||||
|
@ -2287,7 +2287,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
if head_response is not False:
|
||||
# Check for redirect
|
||||
new_url = compat_str(head_response.geturl())
|
||||
new_url = head_response.geturl()
|
||||
if url != new_url:
|
||||
self.report_following_redirect(new_url)
|
||||
if force_videoid:
|
||||
@ -2387,12 +2387,12 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_result(
|
||||
self._parse_xspf(
|
||||
doc, video_id, xspf_url=url,
|
||||
xspf_base_url=compat_str(full_response.geturl())),
|
||||
xspf_base_url=full_response.geturl()),
|
||||
video_id)
|
||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||
info_dict['formats'] = self._parse_mpd_formats(
|
||||
doc,
|
||||
mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],
|
||||
mpd_base_url=full_response.geturl().rpartition('/')[0],
|
||||
mpd_url=url)
|
||||
self._sort_formats(info_dict['formats'])
|
||||
return info_dict
|
||||
|
@ -4,7 +4,6 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
@ -36,7 +35,7 @@ class LecturioBaseIE(InfoExtractor):
|
||||
self._LOGIN_URL, None, 'Downloading login popup')
|
||||
|
||||
def is_logged(url_handle):
|
||||
return self._LOGIN_URL not in compat_str(url_handle.geturl())
|
||||
return self._LOGIN_URL not in url_handle.geturl()
|
||||
|
||||
# Already logged in
|
||||
if is_logged(urlh):
|
||||
|
@ -8,7 +8,6 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@ -99,7 +98,7 @@ class LinuxAcademyIE(InfoExtractor):
|
||||
'sso': 'true',
|
||||
})
|
||||
|
||||
login_state_url = compat_str(urlh.geturl())
|
||||
login_state_url = urlh.geturl()
|
||||
|
||||
try:
|
||||
login_page = self._download_webpage(
|
||||
@ -129,7 +128,7 @@ class LinuxAcademyIE(InfoExtractor):
|
||||
})
|
||||
|
||||
access_token = self._search_regex(
|
||||
r'access_token=([^=&]+)', compat_str(urlh.geturl()),
|
||||
r'access_token=([^=&]+)', urlh.geturl(),
|
||||
'access token')
|
||||
|
||||
self._download_webpage(
|
||||
|
@ -6,7 +6,6 @@ import re
|
||||
from .theplatform import ThePlatformBaseIE
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@ -114,7 +113,7 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
continue
|
||||
urlh = ie._request_webpage(
|
||||
embed_url, video_id, note='Following embed URL redirect')
|
||||
embed_url = compat_str(urlh.geturl())
|
||||
embed_url = urlh.geturl()
|
||||
program_guid = _program_guid(_qs(embed_url))
|
||||
if program_guid:
|
||||
entries.append(embed_url)
|
||||
|
@ -129,7 +129,7 @@ class MediasiteIE(InfoExtractor):
|
||||
query = mobj.group('query')
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(url, resource_id) # XXX: add UrlReferrer?
|
||||
redirect_url = compat_str(urlh.geturl())
|
||||
redirect_url = urlh.geturl()
|
||||
|
||||
# XXX: might have also extracted UrlReferrer and QueryString from the html
|
||||
service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex(
|
||||
|
@ -46,7 +46,7 @@ class PlatziBaseIE(InfoExtractor):
|
||||
headers={'Referer': self._LOGIN_URL})
|
||||
|
||||
# login succeeded
|
||||
if 'platzi.com/login' not in compat_str(urlh.geturl()):
|
||||
if 'platzi.com/login' not in urlh.geturl():
|
||||
return
|
||||
|
||||
login_error = self._webpage_read_content(
|
||||
|
@ -8,7 +8,6 @@ from .common import InfoExtractor
|
||||
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@ -39,13 +38,13 @@ class SafariBaseIE(InfoExtractor):
|
||||
'Downloading login page')
|
||||
|
||||
def is_logged(urlh):
|
||||
return 'learning.oreilly.com/home/' in compat_str(urlh.geturl())
|
||||
return 'learning.oreilly.com/home/' in urlh.geturl()
|
||||
|
||||
if is_logged(urlh):
|
||||
self.LOGGED_IN = True
|
||||
return
|
||||
|
||||
redirect_url = compat_str(urlh.geturl())
|
||||
redirect_url = urlh.geturl()
|
||||
parsed_url = compat_urlparse.urlparse(redirect_url)
|
||||
qs = compat_parse_qs(parsed_url.query)
|
||||
next_uri = compat_urlparse.urljoin(
|
||||
|
@ -4,7 +4,6 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .wistia import WistiaIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
@ -58,7 +57,7 @@ class TeachableBaseIE(InfoExtractor):
|
||||
self._logged_in = True
|
||||
return
|
||||
|
||||
login_url = compat_str(urlh.geturl())
|
||||
login_url = urlh.geturl()
|
||||
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
|
||||
|
@ -11,6 +11,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
@ -24,7 +25,7 @@ class TelecincoIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '1876350223',
|
||||
'title': 'Bacalao con kokotxas al pil-pil',
|
||||
'description': 'md5:1382dacd32dd4592d478cbdca458e5bb',
|
||||
'description': 'md5:716caf5601e25c3c5ab6605b1ae71529',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'adb28c37238b675dad0f042292f209a7',
|
||||
@ -55,6 +56,26 @@ class TelecincoIE(InfoExtractor):
|
||||
'description': 'md5:2771356ff7bfad9179c5f5cd954f1477',
|
||||
'duration': 50,
|
||||
},
|
||||
}, {
|
||||
# video in opening's content
|
||||
'url': 'https://www.telecinco.es/vivalavida/fiorella-sobrina-edmundo-arrocet-entrevista_18_2907195140.html',
|
||||
'info_dict': {
|
||||
'id': '2907195140',
|
||||
'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
|
||||
'description': 'md5:73f340a7320143d37ab895375b2bf13a',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'adb28c37238b675dad0f042292f209a7',
|
||||
'info_dict': {
|
||||
'id': 'TpI2EttSDAReWpJ1o0NVh2',
|
||||
'ext': 'mp4',
|
||||
'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
|
||||
'duration': 1015,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
|
||||
'only_matching': True,
|
||||
@ -135,16 +156,27 @@ class TelecincoIE(InfoExtractor):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
article = self._parse_json(self._search_regex(
|
||||
r'window\.\$REACTBASE_STATE\.article\s*=\s*({.+})',
|
||||
r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=\s*({.+})',
|
||||
webpage, 'article'), display_id)['article']
|
||||
title = article.get('title')
|
||||
description = clean_html(article.get('leadParagraph'))
|
||||
description = clean_html(article.get('leadParagraph')) or ''
|
||||
if article.get('editorialType') != 'VID':
|
||||
entries = []
|
||||
for p in article.get('body', []):
|
||||
content = p.get('content')
|
||||
if p.get('type') != 'video' or not content:
|
||||
body = [article.get('opening')]
|
||||
body.extend(try_get(article, lambda x: x['body'], list) or [])
|
||||
for p in body:
|
||||
if not isinstance(p, dict):
|
||||
continue
|
||||
content = p.get('content')
|
||||
if not content:
|
||||
continue
|
||||
type_ = p.get('type')
|
||||
if type_ == 'paragraph':
|
||||
content_str = str_or_none(content)
|
||||
if content_str:
|
||||
description += content_str
|
||||
continue
|
||||
if type_ == 'video' and isinstance(content, dict):
|
||||
entries.append(self._parse_content(content, url))
|
||||
return self.playlist_result(
|
||||
entries, str_or_none(article.get('id')), title, description)
|
||||
|
@ -4,7 +4,6 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@ -151,7 +150,7 @@ class TumblrIE(InfoExtractor):
|
||||
url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
|
||||
redirect_url = compat_str(urlh.geturl())
|
||||
redirect_url = urlh.geturl()
|
||||
if 'tumblr.com/safe-mode' in redirect_url or redirect_url.startswith('/safe-mode'):
|
||||
raise ExtractorError(
|
||||
'This Tumblr may contain sensitive media. '
|
||||
|
@ -591,7 +591,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
# Retrieve video webpage to extract further information
|
||||
webpage, urlh = self._download_webpage_handle(
|
||||
url, video_id, headers=headers)
|
||||
redirect_url = compat_str(urlh.geturl())
|
||||
redirect_url = urlh.geturl()
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||
errmsg = ee.cause.read()
|
||||
|
@ -47,7 +47,7 @@ class XTubeIE(InfoExtractor):
|
||||
'display_id': 'A-Super-Run-Part-1-YT',
|
||||
'ext': 'flv',
|
||||
'title': 'A Super Run - Part 1 (YT)',
|
||||
'description': 'md5:ca0d47afff4a9b2942e4b41aa970fd93',
|
||||
'description': 'md5:4cc3af1aa1b0413289babc88f0d4f616',
|
||||
'uploader': 'tshirtguy59',
|
||||
'duration': 579,
|
||||
'view_count': int,
|
||||
@ -87,6 +87,20 @@ class XTubeIE(InfoExtractor):
|
||||
'Cookie': 'age_verified=1; cookiesAccepted=1',
|
||||
})
|
||||
|
||||
title, thumbnail, duration = [None] * 3
|
||||
|
||||
config = self._parse_json(self._search_regex(
|
||||
r'playerConf\s*=\s*({.+?})\s*,\s*\n', webpage, 'config',
|
||||
default='{}'), video_id, transform_source=js_to_json, fatal=False)
|
||||
if config:
|
||||
config = config.get('mainRoll')
|
||||
if isinstance(config, dict):
|
||||
title = config.get('title')
|
||||
thumbnail = config.get('poster')
|
||||
duration = int_or_none(config.get('duration'))
|
||||
sources = config.get('sources')
|
||||
|
||||
if isinstance(sources, dict):
|
||||
sources = self._parse_json(self._search_regex(
|
||||
r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),',
|
||||
webpage, 'sources', group='sources'), video_id,
|
||||
@ -102,20 +116,25 @@ class XTubeIE(InfoExtractor):
|
||||
self._remove_duplicate_formats(formats)
|
||||
self._sort_formats(formats)
|
||||
|
||||
if not title:
|
||||
title = self._search_regex(
|
||||
(r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'),
|
||||
webpage, 'title', group='title')
|
||||
description = self._search_regex(
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:description', webpage, default=None) or self._search_regex(
|
||||
r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False)
|
||||
uploader = self._search_regex(
|
||||
(r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"',
|
||||
r'<span[^>]+class="nickname"[^>]*>([^<]+)'),
|
||||
webpage, 'uploader', fatal=False)
|
||||
if not duration:
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>',
|
||||
webpage, 'duration', fatal=False))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>',
|
||||
(r'["\']viewsCount["\'][^>]*>(\d+)\s+views',
|
||||
r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>'),
|
||||
webpage, 'view count', fatal=False))
|
||||
comment_count = str_to_int(self._html_search_regex(
|
||||
r'>Comments? \(([\d,\.]+)\)<',
|
||||
@ -126,6 +145,7 @@ class XTubeIE(InfoExtractor):
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
@ -144,7 +164,7 @@ class XTubeUserIE(InfoExtractor):
|
||||
'id': 'greenshowers-4056496',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'playlist_mincount': 155,
|
||||
'playlist_mincount': 154,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -44,7 +44,7 @@ class YouJizzIE(InfoExtractor):
|
||||
|
||||
encodings = self._parse_json(
|
||||
self._search_regex(
|
||||
r'encodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings',
|
||||
r'[Ee]ncodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings',
|
||||
default='[]'),
|
||||
video_id, fatal=False)
|
||||
for encoding in encodings:
|
||||
|
@ -29,7 +29,6 @@ class ZapiksIE(InfoExtractor):
|
||||
'timestamp': 1359044972,
|
||||
'upload_date': '20130124',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -244,14 +244,14 @@ class ZDFChannelIE(ZDFBaseIE):
|
||||
'id': 'das-aktuelle-sportstudio',
|
||||
'title': 'das aktuelle sportstudio | ZDF',
|
||||
},
|
||||
'playlist_count': 21,
|
||||
'playlist_mincount': 23,
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/dokumentation/planet-e',
|
||||
'info_dict': {
|
||||
'id': 'planet-e',
|
||||
'title': 'planet e.',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
'playlist_mincount': 50,
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/filme/taunuskrimi/',
|
||||
'only_matching': True,
|
||||
|
@ -134,7 +134,7 @@ def parseOpts(overrideArguments=None):
|
||||
action='help',
|
||||
help='Print this help text and exit')
|
||||
general.add_option(
|
||||
'-v', '--version',
|
||||
'--version',
|
||||
action='version',
|
||||
help='Print program version and exit')
|
||||
general.add_option(
|
||||
|
@ -2795,6 +2795,15 @@ class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
|
||||
https_response = http_response
|
||||
|
||||
|
||||
class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
|
||||
if sys.version_info[0] < 3:
|
||||
def redirect_request(self, req, fp, code, msg, headers, newurl):
|
||||
# On python 2 urlh.geturl() may sometimes return redirect URL
|
||||
# as byte string instead of unicode. This workaround allows
|
||||
# to force it always return unicode.
|
||||
return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
|
||||
|
||||
|
||||
def extract_timezone(date_str):
|
||||
m = re.search(
|
||||
r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
|
||||
|
Loading…
x
Reference in New Issue
Block a user