update new
This commit is contained in:
commit
38e70fb328
@ -26,7 +26,6 @@ from youtube_dl.extractor import (
|
|||||||
ThePlatformIE,
|
ThePlatformIE,
|
||||||
ThePlatformFeedIE,
|
ThePlatformFeedIE,
|
||||||
RTVEALaCartaIE,
|
RTVEALaCartaIE,
|
||||||
FunnyOrDieIE,
|
|
||||||
DemocracynowIE,
|
DemocracynowIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -322,18 +321,6 @@ class TestRtveSubtitles(BaseTestSubtitles):
|
|||||||
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
|
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
|
||||||
|
|
||||||
|
|
||||||
class TestFunnyOrDieSubtitles(BaseTestSubtitles):
|
|
||||||
url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine'
|
|
||||||
IE = FunnyOrDieIE
|
|
||||||
|
|
||||||
def test_allsubtitles(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['allsubtitles'] = True
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
|
||||||
self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
|
|
||||||
|
|
||||||
|
|
||||||
class TestDemocracynowSubtitles(BaseTestSubtitles):
|
class TestDemocracynowSubtitles(BaseTestSubtitles):
|
||||||
url = 'http://www.democracynow.org/shows/2015/7/3'
|
url = 'http://www.democracynow.org/shows/2015/7/3'
|
||||||
IE = DemocracynowIE
|
IE = DemocracynowIE
|
||||||
|
@ -92,6 +92,7 @@ from .utils import (
|
|||||||
YoutubeDLCookieJar,
|
YoutubeDLCookieJar,
|
||||||
YoutubeDLCookieProcessor,
|
YoutubeDLCookieProcessor,
|
||||||
YoutubeDLHandler,
|
YoutubeDLHandler,
|
||||||
|
YoutubeDLRedirectHandler,
|
||||||
)
|
)
|
||||||
from .cache import Cache
|
from .cache import Cache
|
||||||
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
|
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
|
||||||
@ -2343,6 +2344,7 @@ class YoutubeDL(object):
|
|||||||
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
|
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
|
||||||
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
|
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
|
||||||
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
|
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
|
||||||
|
redirect_handler = YoutubeDLRedirectHandler()
|
||||||
data_handler = compat_urllib_request_DataHandler()
|
data_handler = compat_urllib_request_DataHandler()
|
||||||
|
|
||||||
# When passing our own FileHandler instance, build_opener won't add the
|
# When passing our own FileHandler instance, build_opener won't add the
|
||||||
@ -2356,7 +2358,7 @@ class YoutubeDL(object):
|
|||||||
file_handler.file_open = file_open
|
file_handler.file_open = file_open
|
||||||
|
|
||||||
opener = compat_urllib_request.build_opener(
|
opener = compat_urllib_request.build_opener(
|
||||||
proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
|
proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
|
||||||
|
|
||||||
# Delete the default user-agent header, which would otherwise apply in
|
# Delete the default user-agent header, which would otherwise apply in
|
||||||
# cases where our custom HTTP handler doesn't come into play
|
# cases where our custom HTTP handler doesn't come into play
|
||||||
|
@ -4,7 +4,6 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
encode_base_n,
|
encode_base_n,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@ -55,7 +54,7 @@ class EpornerIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||||
|
|
||||||
video_id = self._match_id(compat_str(urlh.geturl()))
|
video_id = self._match_id(urlh.geturl())
|
||||||
|
|
||||||
hash = self._search_regex(
|
hash = self._search_regex(
|
||||||
r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
|
r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
|
||||||
|
@ -2287,7 +2287,7 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
if head_response is not False:
|
if head_response is not False:
|
||||||
# Check for redirect
|
# Check for redirect
|
||||||
new_url = compat_str(head_response.geturl())
|
new_url = head_response.geturl()
|
||||||
if url != new_url:
|
if url != new_url:
|
||||||
self.report_following_redirect(new_url)
|
self.report_following_redirect(new_url)
|
||||||
if force_videoid:
|
if force_videoid:
|
||||||
@ -2387,12 +2387,12 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
self._parse_xspf(
|
self._parse_xspf(
|
||||||
doc, video_id, xspf_url=url,
|
doc, video_id, xspf_url=url,
|
||||||
xspf_base_url=compat_str(full_response.geturl())),
|
xspf_base_url=full_response.geturl()),
|
||||||
video_id)
|
video_id)
|
||||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||||
info_dict['formats'] = self._parse_mpd_formats(
|
info_dict['formats'] = self._parse_mpd_formats(
|
||||||
doc,
|
doc,
|
||||||
mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],
|
mpd_base_url=full_response.geturl().rpartition('/')[0],
|
||||||
mpd_url=url)
|
mpd_url=url)
|
||||||
self._sort_formats(info_dict['formats'])
|
self._sort_formats(info_dict['formats'])
|
||||||
return info_dict
|
return info_dict
|
||||||
|
@ -4,7 +4,6 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
@ -36,7 +35,7 @@ class LecturioBaseIE(InfoExtractor):
|
|||||||
self._LOGIN_URL, None, 'Downloading login popup')
|
self._LOGIN_URL, None, 'Downloading login popup')
|
||||||
|
|
||||||
def is_logged(url_handle):
|
def is_logged(url_handle):
|
||||||
return self._LOGIN_URL not in compat_str(url_handle.geturl())
|
return self._LOGIN_URL not in url_handle.geturl()
|
||||||
|
|
||||||
# Already logged in
|
# Already logged in
|
||||||
if is_logged(urlh):
|
if is_logged(urlh):
|
||||||
|
@ -8,7 +8,6 @@ from .common import InfoExtractor
|
|||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_b64decode,
|
compat_b64decode,
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
compat_str,
|
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@ -99,7 +98,7 @@ class LinuxAcademyIE(InfoExtractor):
|
|||||||
'sso': 'true',
|
'sso': 'true',
|
||||||
})
|
})
|
||||||
|
|
||||||
login_state_url = compat_str(urlh.geturl())
|
login_state_url = urlh.geturl()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
login_page = self._download_webpage(
|
login_page = self._download_webpage(
|
||||||
@ -129,7 +128,7 @@ class LinuxAcademyIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
|
|
||||||
access_token = self._search_regex(
|
access_token = self._search_regex(
|
||||||
r'access_token=([^=&]+)', compat_str(urlh.geturl()),
|
r'access_token=([^=&]+)', urlh.geturl(),
|
||||||
'access token')
|
'access token')
|
||||||
|
|
||||||
self._download_webpage(
|
self._download_webpage(
|
||||||
|
@ -6,7 +6,6 @@ import re
|
|||||||
from .theplatform import ThePlatformBaseIE
|
from .theplatform import ThePlatformBaseIE
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_str,
|
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -114,7 +113,7 @@ class MediasetIE(ThePlatformBaseIE):
|
|||||||
continue
|
continue
|
||||||
urlh = ie._request_webpage(
|
urlh = ie._request_webpage(
|
||||||
embed_url, video_id, note='Following embed URL redirect')
|
embed_url, video_id, note='Following embed URL redirect')
|
||||||
embed_url = compat_str(urlh.geturl())
|
embed_url = urlh.geturl()
|
||||||
program_guid = _program_guid(_qs(embed_url))
|
program_guid = _program_guid(_qs(embed_url))
|
||||||
if program_guid:
|
if program_guid:
|
||||||
entries.append(embed_url)
|
entries.append(embed_url)
|
||||||
|
@ -129,7 +129,7 @@ class MediasiteIE(InfoExtractor):
|
|||||||
query = mobj.group('query')
|
query = mobj.group('query')
|
||||||
|
|
||||||
webpage, urlh = self._download_webpage_handle(url, resource_id) # XXX: add UrlReferrer?
|
webpage, urlh = self._download_webpage_handle(url, resource_id) # XXX: add UrlReferrer?
|
||||||
redirect_url = compat_str(urlh.geturl())
|
redirect_url = urlh.geturl()
|
||||||
|
|
||||||
# XXX: might have also extracted UrlReferrer and QueryString from the html
|
# XXX: might have also extracted UrlReferrer and QueryString from the html
|
||||||
service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex(
|
service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex(
|
||||||
|
@ -46,7 +46,7 @@ class PlatziBaseIE(InfoExtractor):
|
|||||||
headers={'Referer': self._LOGIN_URL})
|
headers={'Referer': self._LOGIN_URL})
|
||||||
|
|
||||||
# login succeeded
|
# login succeeded
|
||||||
if 'platzi.com/login' not in compat_str(urlh.geturl()):
|
if 'platzi.com/login' not in urlh.geturl():
|
||||||
return
|
return
|
||||||
|
|
||||||
login_error = self._webpage_read_content(
|
login_error = self._webpage_read_content(
|
||||||
|
@ -8,7 +8,6 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_str,
|
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -39,13 +38,13 @@ class SafariBaseIE(InfoExtractor):
|
|||||||
'Downloading login page')
|
'Downloading login page')
|
||||||
|
|
||||||
def is_logged(urlh):
|
def is_logged(urlh):
|
||||||
return 'learning.oreilly.com/home/' in compat_str(urlh.geturl())
|
return 'learning.oreilly.com/home/' in urlh.geturl()
|
||||||
|
|
||||||
if is_logged(urlh):
|
if is_logged(urlh):
|
||||||
self.LOGGED_IN = True
|
self.LOGGED_IN = True
|
||||||
return
|
return
|
||||||
|
|
||||||
redirect_url = compat_str(urlh.geturl())
|
redirect_url = urlh.geturl()
|
||||||
parsed_url = compat_urlparse.urlparse(redirect_url)
|
parsed_url = compat_urlparse.urlparse(redirect_url)
|
||||||
qs = compat_parse_qs(parsed_url.query)
|
qs = compat_parse_qs(parsed_url.query)
|
||||||
next_uri = compat_urlparse.urljoin(
|
next_uri = compat_urlparse.urljoin(
|
||||||
|
@ -4,7 +4,6 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .wistia import WistiaIE
|
from .wistia import WistiaIE
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@ -58,7 +57,7 @@ class TeachableBaseIE(InfoExtractor):
|
|||||||
self._logged_in = True
|
self._logged_in = True
|
||||||
return
|
return
|
||||||
|
|
||||||
login_url = compat_str(urlh.geturl())
|
login_url = urlh.geturl()
|
||||||
|
|
||||||
login_form = self._hidden_inputs(login_page)
|
login_form = self._hidden_inputs(login_page)
|
||||||
|
|
||||||
|
@ -11,6 +11,7 @@ from ..utils import (
|
|||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
|
try_get,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -24,7 +25,7 @@ class TelecincoIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1876350223',
|
'id': '1876350223',
|
||||||
'title': 'Bacalao con kokotxas al pil-pil',
|
'title': 'Bacalao con kokotxas al pil-pil',
|
||||||
'description': 'md5:1382dacd32dd4592d478cbdca458e5bb',
|
'description': 'md5:716caf5601e25c3c5ab6605b1ae71529',
|
||||||
},
|
},
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
'md5': 'adb28c37238b675dad0f042292f209a7',
|
'md5': 'adb28c37238b675dad0f042292f209a7',
|
||||||
@ -55,6 +56,26 @@ class TelecincoIE(InfoExtractor):
|
|||||||
'description': 'md5:2771356ff7bfad9179c5f5cd954f1477',
|
'description': 'md5:2771356ff7bfad9179c5f5cd954f1477',
|
||||||
'duration': 50,
|
'duration': 50,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# video in opening's content
|
||||||
|
'url': 'https://www.telecinco.es/vivalavida/fiorella-sobrina-edmundo-arrocet-entrevista_18_2907195140.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2907195140',
|
||||||
|
'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
|
||||||
|
'description': 'md5:73f340a7320143d37ab895375b2bf13a',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'md5': 'adb28c37238b675dad0f042292f209a7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'TpI2EttSDAReWpJ1o0NVh2',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
|
||||||
|
'duration': 1015,
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
|
'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -135,17 +156,28 @@ class TelecincoIE(InfoExtractor):
|
|||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
article = self._parse_json(self._search_regex(
|
article = self._parse_json(self._search_regex(
|
||||||
r'window\.\$REACTBASE_STATE\.article\s*=\s*({.+})',
|
r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=\s*({.+})',
|
||||||
webpage, 'article'), display_id)['article']
|
webpage, 'article'), display_id)['article']
|
||||||
title = article.get('title')
|
title = article.get('title')
|
||||||
description = clean_html(article.get('leadParagraph'))
|
description = clean_html(article.get('leadParagraph')) or ''
|
||||||
if article.get('editorialType') != 'VID':
|
if article.get('editorialType') != 'VID':
|
||||||
entries = []
|
entries = []
|
||||||
for p in article.get('body', []):
|
body = [article.get('opening')]
|
||||||
content = p.get('content')
|
body.extend(try_get(article, lambda x: x['body'], list) or [])
|
||||||
if p.get('type') != 'video' or not content:
|
for p in body:
|
||||||
|
if not isinstance(p, dict):
|
||||||
continue
|
continue
|
||||||
entries.append(self._parse_content(content, url))
|
content = p.get('content')
|
||||||
|
if not content:
|
||||||
|
continue
|
||||||
|
type_ = p.get('type')
|
||||||
|
if type_ == 'paragraph':
|
||||||
|
content_str = str_or_none(content)
|
||||||
|
if content_str:
|
||||||
|
description += content_str
|
||||||
|
continue
|
||||||
|
if type_ == 'video' and isinstance(content, dict):
|
||||||
|
entries.append(self._parse_content(content, url))
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, str_or_none(article.get('id')), title, description)
|
entries, str_or_none(article.get('id')), title, description)
|
||||||
content = article['opening']['content']
|
content = article['opening']['content']
|
||||||
|
@ -4,7 +4,6 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@ -151,7 +150,7 @@ class TumblrIE(InfoExtractor):
|
|||||||
url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
|
url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
|
||||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||||
|
|
||||||
redirect_url = compat_str(urlh.geturl())
|
redirect_url = urlh.geturl()
|
||||||
if 'tumblr.com/safe-mode' in redirect_url or redirect_url.startswith('/safe-mode'):
|
if 'tumblr.com/safe-mode' in redirect_url or redirect_url.startswith('/safe-mode'):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'This Tumblr may contain sensitive media. '
|
'This Tumblr may contain sensitive media. '
|
||||||
|
@ -591,7 +591,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
# Retrieve video webpage to extract further information
|
# Retrieve video webpage to extract further information
|
||||||
webpage, urlh = self._download_webpage_handle(
|
webpage, urlh = self._download_webpage_handle(
|
||||||
url, video_id, headers=headers)
|
url, video_id, headers=headers)
|
||||||
redirect_url = compat_str(urlh.geturl())
|
redirect_url = urlh.geturl()
|
||||||
except ExtractorError as ee:
|
except ExtractorError as ee:
|
||||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||||
errmsg = ee.cause.read()
|
errmsg = ee.cause.read()
|
||||||
|
@ -47,7 +47,7 @@ class XTubeIE(InfoExtractor):
|
|||||||
'display_id': 'A-Super-Run-Part-1-YT',
|
'display_id': 'A-Super-Run-Part-1-YT',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'A Super Run - Part 1 (YT)',
|
'title': 'A Super Run - Part 1 (YT)',
|
||||||
'description': 'md5:ca0d47afff4a9b2942e4b41aa970fd93',
|
'description': 'md5:4cc3af1aa1b0413289babc88f0d4f616',
|
||||||
'uploader': 'tshirtguy59',
|
'uploader': 'tshirtguy59',
|
||||||
'duration': 579,
|
'duration': 579,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
@ -87,10 +87,24 @@ class XTubeIE(InfoExtractor):
|
|||||||
'Cookie': 'age_verified=1; cookiesAccepted=1',
|
'Cookie': 'age_verified=1; cookiesAccepted=1',
|
||||||
})
|
})
|
||||||
|
|
||||||
sources = self._parse_json(self._search_regex(
|
title, thumbnail, duration = [None] * 3
|
||||||
r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),',
|
|
||||||
webpage, 'sources', group='sources'), video_id,
|
config = self._parse_json(self._search_regex(
|
||||||
transform_source=js_to_json)
|
r'playerConf\s*=\s*({.+?})\s*,\s*\n', webpage, 'config',
|
||||||
|
default='{}'), video_id, transform_source=js_to_json, fatal=False)
|
||||||
|
if config:
|
||||||
|
config = config.get('mainRoll')
|
||||||
|
if isinstance(config, dict):
|
||||||
|
title = config.get('title')
|
||||||
|
thumbnail = config.get('poster')
|
||||||
|
duration = int_or_none(config.get('duration'))
|
||||||
|
sources = config.get('sources')
|
||||||
|
|
||||||
|
if isinstance(sources, dict):
|
||||||
|
sources = self._parse_json(self._search_regex(
|
||||||
|
r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),',
|
||||||
|
webpage, 'sources', group='sources'), video_id,
|
||||||
|
transform_source=js_to_json)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, format_url in sources.items():
|
for format_id, format_url in sources.items():
|
||||||
@ -102,20 +116,25 @@ class XTubeIE(InfoExtractor):
|
|||||||
self._remove_duplicate_formats(formats)
|
self._remove_duplicate_formats(formats)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = self._search_regex(
|
if not title:
|
||||||
(r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'),
|
title = self._search_regex(
|
||||||
webpage, 'title', group='title')
|
(r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'),
|
||||||
description = self._search_regex(
|
webpage, 'title', group='title')
|
||||||
|
description = self._og_search_description(
|
||||||
|
webpage, default=None) or self._html_search_meta(
|
||||||
|
'twitter:description', webpage, default=None) or self._search_regex(
|
||||||
r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False)
|
r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False)
|
||||||
uploader = self._search_regex(
|
uploader = self._search_regex(
|
||||||
(r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"',
|
(r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"',
|
||||||
r'<span[^>]+class="nickname"[^>]*>([^<]+)'),
|
r'<span[^>]+class="nickname"[^>]*>([^<]+)'),
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
duration = parse_duration(self._search_regex(
|
if not duration:
|
||||||
r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>',
|
duration = parse_duration(self._search_regex(
|
||||||
webpage, 'duration', fatal=False))
|
r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>',
|
||||||
|
webpage, 'duration', fatal=False))
|
||||||
view_count = str_to_int(self._search_regex(
|
view_count = str_to_int(self._search_regex(
|
||||||
r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>',
|
(r'["\']viewsCount["\'][^>]*>(\d+)\s+views',
|
||||||
|
r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>'),
|
||||||
webpage, 'view count', fatal=False))
|
webpage, 'view count', fatal=False))
|
||||||
comment_count = str_to_int(self._html_search_regex(
|
comment_count = str_to_int(self._html_search_regex(
|
||||||
r'>Comments? \(([\d,\.]+)\)<',
|
r'>Comments? \(([\d,\.]+)\)<',
|
||||||
@ -126,6 +145,7 @@ class XTubeIE(InfoExtractor):
|
|||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
@ -144,7 +164,7 @@ class XTubeUserIE(InfoExtractor):
|
|||||||
'id': 'greenshowers-4056496',
|
'id': 'greenshowers-4056496',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
},
|
},
|
||||||
'playlist_mincount': 155,
|
'playlist_mincount': 154,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -44,7 +44,7 @@ class YouJizzIE(InfoExtractor):
|
|||||||
|
|
||||||
encodings = self._parse_json(
|
encodings = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'encodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings',
|
r'[Ee]ncodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings',
|
||||||
default='[]'),
|
default='[]'),
|
||||||
video_id, fatal=False)
|
video_id, fatal=False)
|
||||||
for encoding in encodings:
|
for encoding in encodings:
|
||||||
|
@ -29,7 +29,6 @@ class ZapiksIE(InfoExtractor):
|
|||||||
'timestamp': 1359044972,
|
'timestamp': 1359044972,
|
||||||
'upload_date': '20130124',
|
'upload_date': '20130124',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -244,14 +244,14 @@ class ZDFChannelIE(ZDFBaseIE):
|
|||||||
'id': 'das-aktuelle-sportstudio',
|
'id': 'das-aktuelle-sportstudio',
|
||||||
'title': 'das aktuelle sportstudio | ZDF',
|
'title': 'das aktuelle sportstudio | ZDF',
|
||||||
},
|
},
|
||||||
'playlist_count': 21,
|
'playlist_mincount': 23,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.zdf.de/dokumentation/planet-e',
|
'url': 'https://www.zdf.de/dokumentation/planet-e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'planet-e',
|
'id': 'planet-e',
|
||||||
'title': 'planet e.',
|
'title': 'planet e.',
|
||||||
},
|
},
|
||||||
'playlist_count': 4,
|
'playlist_mincount': 50,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.zdf.de/filme/taunuskrimi/',
|
'url': 'https://www.zdf.de/filme/taunuskrimi/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -134,7 +134,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
action='help',
|
action='help',
|
||||||
help='Print this help text and exit')
|
help='Print this help text and exit')
|
||||||
general.add_option(
|
general.add_option(
|
||||||
'-v', '--version',
|
'--version',
|
||||||
action='version',
|
action='version',
|
||||||
help='Print program version and exit')
|
help='Print program version and exit')
|
||||||
general.add_option(
|
general.add_option(
|
||||||
|
@ -2795,6 +2795,15 @@ class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
|
|||||||
https_response = http_response
|
https_response = http_response
|
||||||
|
|
||||||
|
|
||||||
|
class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
|
||||||
|
if sys.version_info[0] < 3:
|
||||||
|
def redirect_request(self, req, fp, code, msg, headers, newurl):
|
||||||
|
# On python 2 urlh.geturl() may sometimes return redirect URL
|
||||||
|
# as byte string instead of unicode. This workaround allows
|
||||||
|
# to force it always return unicode.
|
||||||
|
return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
|
||||||
|
|
||||||
|
|
||||||
def extract_timezone(date_str):
|
def extract_timezone(date_str):
|
||||||
m = re.search(
|
m = re.search(
|
||||||
r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
|
r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user