From 32a35e441874ad9daba10c29a6a33f13a4953fbb Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Mon, 28 Oct 2013 17:35:01 +0100 Subject: [PATCH 01/55] Add support for http://www.extremetube.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/extremetube.py | 52 +++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 youtube_dl/extractor/extremetube.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0d933986f..5eed1eebd 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -39,6 +39,7 @@ from .ehow import EHowIE from .eighttracks import EightTracksIE from .escapist import EscapistIE from .exfm import ExfmIE +from .extremetube import ExtremeTubeIE from .facebook import FacebookIE from .faz import FazIE from .fktv import ( diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py new file mode 100644 index 000000000..981de430d --- /dev/null +++ b/youtube_dl/extractor/extremetube.py @@ -0,0 +1,52 @@ +import os +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse_urlparse, + compat_urllib_request, + compat_urllib_parse, +) + +class ExtremeTubeIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Pextremetube\.com/video/.+?(?P[0-9]+))(?:[/?&]|$)' + _TEST = { + u'url': u'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431', + u'file': u'652431.mp4', + u'md5': u'1fb9228f5e3332ec8c057d6ac36f33e0', + u'info_dict': { + u"title": u"Music Video 14 british euro brit european cumshots swallow", + u"uploader": u"unknown", + u"age_limit": 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('videoid') + url = 'http://www.' + mobj.group('url') + + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'age_verified=1') + webpage = self._download_webpage(req, video_id) + + video_title = self._html_search_regex(r'

]*?title="([^"]+)"[^>]*>\1<', webpage, u'title') + uploader = self._html_search_regex(r'>Posted by:(?=<)(\s|<[^>]*>)*(.+?)\|', webpage, u'uploader', fatal=False) + video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, u'video_url')) + path = compat_urllib_parse_urlparse( video_url ).path + extension = os.path.splitext( path )[1][1:] + format = path.split('/')[5].split('_')[:2] + format = "-".join( format ) + + age_limit = self._rta_search(webpage) + + return { + 'id': video_id, + 'title': video_title, + 'uploader': uploader, + 'url': video_url, + 'ext': extension, + 'format': format, + 'format_id': format, + 'age_limit': age_limit, + } From 77ae65877e7b4b71d446ea928fd14f973826f07b Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Mon, 28 Oct 2013 18:18:58 +0100 Subject: [PATCH 02/55] Add support for http://www.mofosex.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/mofosex.py | 49 ++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 youtube_dl/extractor/mofosex.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0d933986f..045d4447a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -81,6 +81,7 @@ from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mit import TechTVMITIE, MITIE from .mixcloud import MixcloudIE +from .mofosex import MofosexIE from .mtv import MTVIE from .muzu import MuzuTVIE from .myspass import MySpassIE diff --git a/youtube_dl/extractor/mofosex.py b/youtube_dl/extractor/mofosex.py new file mode 100644 index 000000000..a0c926cd1 --- /dev/null +++ b/youtube_dl/extractor/mofosex.py @@ -0,0 +1,49 @@ +import os +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse_urlparse, + compat_urllib_request, + compat_urllib_parse, +) + +class MofosexIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Pmofosex\.com/videos/(?P[0-9]+)/.*?\.html)' + _TEST = { + u'url': u'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html', + u'file': u'5018.mp4', + u'md5': u'1b2eb47ac33cc75d4a80e3026b613c5a', + u'info_dict': { + u"title": u"Japanese Teen Music Video", + u"age_limit": 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('videoid') + url = 'http://www.' + mobj.group('url') + + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'age_verified=1') + webpage = self._download_webpage(req, video_id) + + video_title = self._html_search_regex(r'

(.+?)<', webpage, u'title') + video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, u'video_url')) + path = compat_urllib_parse_urlparse( video_url ).path + extension = os.path.splitext( path )[1][1:] + format = path.split('/')[5].split('_')[:2] + format = "-".join( format ) + + age_limit = self._rta_search(webpage) + + return { + 'id': video_id, + 'title': video_title, + 'url': video_url, + 'ext': extension, + 'format': format, + 'format_id': format, + 'age_limit': age_limit, + } From dcc2a706ef7df65839aa40ce5fda61f8cea36645 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Mon, 28 Oct 2013 19:23:48 +0100 Subject: [PATCH 03/55] Add support for http://www.xtube.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/xtube.py | 54 ++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 youtube_dl/extractor/xtube.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0d933986f..7efd097e4 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -149,6 +149,7 @@ from .worldstarhiphop import WorldStarHipHopIE from .xhamster import XHamsterIE from .xnxx import XNXXIE from .xvideos import XVideosIE +from .xtube import XTubeIE from .yahoo import YahooIE, YahooSearchIE from .youjizz import YouJizzIE from .youku import YoukuIE diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py new file mode 100644 index 000000000..7d06a7021 --- /dev/null +++ b/youtube_dl/extractor/xtube.py @@ -0,0 +1,54 @@ +import os +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse_urlparse, + compat_urllib_request, + compat_urllib_parse, +) + +class XTubeIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Pxtube\.com/watch\.php\?v=(?P[^/?&]+))' + _TEST = { + u'url': u'http://www.xtube.com/watch.php?v=kVTUy_G222_', + u'file': u'kVTUy_G222_.mp4', + u'md5': u'092fbdd3cbe292c920ef6fc6a8a9cdab', + u'info_dict': { + u"title": u"strange erotica", + u"uploader": u"greenshowers", + u"age_limit": 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('videoid') + url = 'http://www.' + mobj.group('url') + + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'age_verified=1') + webpage = self._download_webpage(req, video_id) + + video_title = self._html_search_regex(r'
([^<]+)', webpage, u'description', default=None) + video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, u'video_url').replace('\\/', '/') + path = compat_urllib_parse_urlparse( video_url ).path + extension = os.path.splitext( path )[1][1:] + format = path.split('/')[5].split('_')[:2] + format[0] += 'p' + format[1] += 'k' + format = "-".join( format ) + + return { + 'id': video_id, + 'title': video_title, + 'uploader': video_uploader, + 'description': video_description, + 'url': video_url, + 'ext': extension, + 'format': format, + 'format_id': format, + 'age_limit': 18, + } From ac2547f5ffc30a352207336194e7bbb0435d01a7 Mon Sep 17 00:00:00 2001 From: Alex Van't Hof Date: Thu, 31 Oct 2013 01:57:22 -0400 Subject: [PATCH 04/55] [teamcoco] Fix video url extraction for some videos Video url extraction failed for some videos, e.g. http://teamcoco.com/video/old-time-baseball The url extracted was also occasionally suboptimal quality, e.g. http://teamcoco.com/video/louis-ck-interview-george-w-bush --- youtube_dl/extractor/teamcoco.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index c910110ca..76246c7cc 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -3,6 +3,7 @@ import re from .common import InfoExtractor from ..utils import ( ExtractorError, + RegexNotFoundError, ) @@ -11,7 +12,7 @@ class TeamcocoIE(InfoExtractor): _TEST = { u'url': u'http://teamcoco.com/video/louis-ck-interview-george-w-bush', u'file': u'19705.mp4', - u'md5': u'27b6f7527da5acf534b15f21b032656e', + u'md5': u'cde9ba0fa3506f5f017ce11ead928f9a', u'info_dict': { u"description": u"Louis C.K. got starstruck by George W. Bush, so what? Part one.", u"title": u"Louis C.K. Interview Pt. 1 11/3/11" @@ -33,8 +34,21 @@ class TeamcocoIE(InfoExtractor): data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id data = self._download_webpage(data_url, video_id, 'Downloading data webpage') - video_url = self._html_search_regex(r']*type="high".*?>(.*?)', - data, u'video URL') + + qualities = [ '1080p', '720p', '1000k', '480p', '500k' ] + best_quality_idx = len(qualities)+1 # First regex match may not be optimal + for idx, quality in enumerate(qualities): + regex = r']*type="(?:high|standard)".*?>(.*%s.*)' % quality + try: + url = self._html_search_regex(regex, data, u'video URL') + if idx < best_quality_idx: + video_url = url + best_quality_idx = idx + except RegexNotFoundError: + # Just catch fatal exc. Don't want the fatal=False warning + continue + if not video_url: + raise RegexNotFoundError(u'Unable to extract video URL') return [{ 'id': video_id, From ab4e15134719e6c01a3a9768f21a0f361e4b781d Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Fri, 1 Nov 2013 01:24:23 +0100 Subject: [PATCH 05/55] [CinemassacreIE] Support more embed urls --- youtube_dl/extractor/cinemassacre.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py index 2fe1033f0..8f9396d6b 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/cinemassacre.py @@ -41,7 +41,7 @@ class CinemassacreIE(InfoExtractor): webpage_url = u'http://' + mobj.group('url') webpage = self._download_webpage(webpage_url, None) # Don't know video id yet video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') - mobj = re.search(r'src="(?Phttp://player\.screenwavemedia\.com/play/(?:embed|player)\.php\?id=(?:Cinemassacre-)?(?P.+?))"', webpage) + mobj = re.search(r'src="(?Phttp://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P.+?))"', webpage) if not mobj: raise ExtractorError(u'Can\'t extract embed url and video id') playerdata_url = mobj.group(u'embed_url') From 31366066bd18cfd32de901264f53f42fe96f55c2 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Sat, 2 Nov 2013 18:08:16 +0100 Subject: [PATCH 06/55] Add support for live parameter to rtmpdump --- youtube_dl/FileDownloader.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 8ecabab1a..0804dfbe1 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -267,7 +267,7 @@ class FileDownloader(object): self.to_screen(u'\r%s[download] 100%% of %s in %s' % (clear_line, data_len_str, self.format_seconds(tot_time))) - def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url): + def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live): self.report_destination(filename) tmpfilename = self.temp_name(filename) test = self.params.get('test', False) @@ -294,6 +294,8 @@ class FileDownloader(object): basic_args += ['--tcUrl', url] if test: basic_args += ['--stop', '1'] + if live: + basic_args += ['--live'] args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)] if self.params.get('verbose', False): try: @@ -411,7 +413,8 @@ class FileDownloader(object): info_dict.get('player_url', None), info_dict.get('page_url', None), info_dict.get('play_path', None), - info_dict.get('tc_url', None)) + info_dict.get('tc_url', None), + info_dict.get('live', False)) # Attempt to download using mplayer if url.startswith('mms') or url.startswith('rtsp'): From 0a43ddf3209e13f5e87b07c440e03a45deea3e57 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Sat, 2 Nov 2013 18:08:35 +0100 Subject: [PATCH 07/55] [CinemassacreIE] Add live paramter to extracted info as a workaround --- youtube_dl/extractor/cinemassacre.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py index 2fe1033f0..79d879ced 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/cinemassacre.py @@ -65,6 +65,7 @@ class CinemassacreIE(InfoExtractor): { 'url': url, 'play_path': 'mp4:' + sd_file, + 'live': True, # workaround 'ext': 'flv', 'format': 'sd', 'format_id': 'sd', @@ -72,6 +73,7 @@ class CinemassacreIE(InfoExtractor): { 'url': url, 'play_path': 'mp4:' + hd_file, + 'live': True, # workaround 'ext': 'flv', 'format': 'hd', 'format_id': 'hd', From 1f343eaabbb9e0daf67363b7737833cf5e2a3e16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isma=C3=ABl=20Mej=C3=ADa?= Date: Sat, 2 Nov 2013 18:01:05 +0100 Subject: [PATCH 08/55] [subtitles] refactor to support websites with subtitle information the webpage. I added the parameter webpage, so now it's similar to the way automatic captions are handled. This is an improvement needed for websites like TED. --- youtube_dl/extractor/dailymotion.py | 6 +++--- youtube_dl/extractor/subtitles.py | 12 ++++++------ youtube_dl/extractor/youtube.py | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 7d8353946..3aef82bcf 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -113,9 +113,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): video_url = info[max_quality] # subtitles - video_subtitles = self.extract_subtitles(video_id) + video_subtitles = self.extract_subtitles(video_id, webpage) if self._downloader.params.get('listsubtitles', False): - self._list_available_subtitles(video_id) + self._list_available_subtitles(video_id, webpage) return return [{ @@ -129,7 +129,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): 'thumbnail': info['thumbnail_url'] }] - def _get_available_subtitles(self, video_id): + def _get_available_subtitles(self, video_id, webpage): try: sub_list = self._download_webpage( 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id, diff --git a/youtube_dl/extractor/subtitles.py b/youtube_dl/extractor/subtitles.py index 90de7de3a..4b4c5235d 100644 --- a/youtube_dl/extractor/subtitles.py +++ b/youtube_dl/extractor/subtitles.py @@ -12,9 +12,9 @@ class SubtitlesInfoExtractor(InfoExtractor): return any([self._downloader.params.get('writesubtitles', False), self._downloader.params.get('writeautomaticsub')]) - def _list_available_subtitles(self, video_id, webpage=None): + def _list_available_subtitles(self, video_id, webpage): """ outputs the available subtitles for the video """ - sub_lang_list = self._get_available_subtitles(video_id) + sub_lang_list = self._get_available_subtitles(video_id, webpage) auto_captions_list = self._get_available_automatic_caption(video_id, webpage) sub_lang = ",".join(list(sub_lang_list.keys())) self.to_screen(u'%s: Available subtitles for video: %s' % @@ -23,7 +23,7 @@ class SubtitlesInfoExtractor(InfoExtractor): self.to_screen(u'%s: Available automatic captions for video: %s' % (video_id, auto_lang)) - def extract_subtitles(self, video_id, video_webpage=None): + def extract_subtitles(self, video_id, webpage): """ returns {sub_lang: sub} ,{} if subtitles not found or None if the subtitles aren't requested. @@ -32,9 +32,9 @@ class SubtitlesInfoExtractor(InfoExtractor): return None available_subs_list = {} if self._downloader.params.get('writeautomaticsub', False): - available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage)) + available_subs_list.update(self._get_available_automatic_caption(video_id, webpage)) if self._downloader.params.get('writesubtitles', False): - available_subs_list.update(self._get_available_subtitles(video_id)) + available_subs_list.update(self._get_available_subtitles(video_id, webpage)) if not available_subs_list: # error, it didn't get the available subtitles return {} @@ -74,7 +74,7 @@ class SubtitlesInfoExtractor(InfoExtractor): return return sub - def _get_available_subtitles(self, video_id): + def _get_available_subtitles(self, video_id, webpage): """ returns {sub_lang: url} or {} if not available Must be redefined by the subclasses diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4347651d7..d7c9b38f9 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1099,7 +1099,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): else: raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) - def _get_available_subtitles(self, video_id): + def _get_available_subtitles(self, video_id, webpage): try: sub_list = self._download_webpage( 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id, From a9a3876d55be943a7eaf505cbeb8fb862514db6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isma=C3=ABl=20Mej=C3=ADa?= Date: Sat, 2 Nov 2013 19:48:39 +0100 Subject: [PATCH 09/55] [ted] Added support for subtitle download --- test/test_ted_subtitles.py | 63 +++++++++++++++++++++++++++++++++++++ youtube_dl/extractor/ted.py | 28 ++++++++++++++--- 2 files changed, 87 insertions(+), 4 deletions(-) create mode 100644 test/test_ted_subtitles.py diff --git a/test/test_ted_subtitles.py b/test/test_ted_subtitles.py new file mode 100644 index 000000000..3283253ab --- /dev/null +++ b/test/test_ted_subtitles.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python + +import sys +import unittest +import hashlib + +# Allow direct execution +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.extractor import TEDIE +from youtube_dl.utils import * +from helper import FakeYDL + +md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() + +class TestTedSubtitles(unittest.TestCase): + def setUp(self): + self.DL = FakeYDL() + self.url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html' + def getInfoDict(self): + IE = TEDIE(self.DL) + info_dict = IE.extract(self.url) + return info_dict + def getSubtitles(self): + info_dict = self.getInfoDict() + return info_dict[0]['subtitles'] + def test_no_writesubtitles(self): + subtitles = self.getSubtitles() + self.assertEqual(subtitles, None) + def test_subtitles(self): + self.DL.params['writesubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(md5(subtitles['en']), '2154f31ff9b9f89a0aa671537559c21d') + def test_subtitles_lang(self): + self.DL.params['writesubtitles'] = True + self.DL.params['subtitleslangs'] = ['fr'] + subtitles = self.getSubtitles() + self.assertEqual(md5(subtitles['fr']), '7616cbc6df20ec2c1204083c83871cf6') + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(len(subtitles.keys()), 28) + def test_list_subtitles(self): + self.DL.params['listsubtitles'] = True + info_dict = self.getInfoDict() + self.assertEqual(info_dict, [None]) + def test_automatic_captions(self): + self.DL.params['writeautomaticsub'] = True + self.DL.params['subtitleslang'] = ['en'] + subtitles = self.getSubtitles() + self.assertTrue(len(subtitles.keys()) == 0) + def test_multiple_langs(self): + self.DL.params['writesubtitles'] = True + langs = ['es', 'fr', 'de'] + self.DL.params['subtitleslangs'] = langs + subtitles = self.getSubtitles() + for lang in langs: + self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang) + +if __name__ == '__main__': + unittest.main() diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index dfa1176a3..239e2a448 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -1,10 +1,9 @@ import json import re -from .common import InfoExtractor +from .subtitles import SubtitlesInfoExtractor - -class TEDIE(InfoExtractor): +class TEDIE(SubtitlesInfoExtractor): _VALID_URL=r'''http://www\.ted\.com/ ( ((?Pplaylists)/(?P\d+)) # We have a playlist @@ -82,11 +81,21 @@ class TEDIE(InfoExtractor): 'url': stream['file'], 'format': stream['id'] } for stream in info['htmlStreams']] + + video_id = info['id'] + + # subtitles + video_subtitles = self.extract_subtitles(video_id, webpage) + if self._downloader.params.get('listsubtitles', False): + self._list_available_subtitles(video_id, webpage) + return + info = { - 'id': info['id'], + 'id': video_id, 'title': title, 'thumbnail': thumbnail, 'description': desc, + 'subtitles': video_subtitles, 'formats': formats, } @@ -94,3 +103,14 @@ class TEDIE(InfoExtractor): info.update(info['formats'][-1]) return info + + def _get_available_subtitles(self, video_id, webpage): + options = self._search_regex(r'(?:)', webpage, 'subtitles_language_select', flags=re.DOTALL) + languages = re.findall(r'(?: