YoutubeIE: fallback to automatic captions when subtitles aren't found (closes #843)
Also modify test_youtube_subtitles to support running the tests in any order.
This commit is contained in:
		
							parent
							
								
									57adeaea87
								
							
						
					
					
						commit
						dc1c355b72
					
				| @ -28,7 +28,9 @@ compat_urllib_request.install_opener(opener) | |||||||
| class FakeDownloader(FileDownloader): | class FakeDownloader(FileDownloader): | ||||||
|     def __init__(self): |     def __init__(self): | ||||||
|         self.result = [] |         self.result = [] | ||||||
|         self.params = parameters |         # Different instances of the downloader can't share the same dictionary | ||||||
|  |         # some test set the "sublang" parameter, which would break the md5 checks. | ||||||
|  |         self.params = dict(parameters) | ||||||
|     def to_screen(self, s): |     def to_screen(self, s): | ||||||
|         print(s) |         print(s) | ||||||
|     def trouble(self, s, tb=None): |     def trouble(self, s, tb=None): | ||||||
| @ -96,6 +98,14 @@ class TestYoutubeSubtitles(unittest.TestCase): | |||||||
|         IE = YoutubeIE(DL) |         IE = YoutubeIE(DL) | ||||||
|         info_dict = IE.extract('QRS8MkLhQmM') |         info_dict = IE.extract('QRS8MkLhQmM') | ||||||
|         self.assertEqual(info_dict, None) |         self.assertEqual(info_dict, None) | ||||||
|  |     def test_youtube_automatic_captions(self): | ||||||
|  |         DL = FakeDownloader() | ||||||
|  |         DL.params['writesubtitles'] = True | ||||||
|  |         DL.params['subtitleslang'] = 'it' | ||||||
|  |         IE = YoutubeIE(DL) | ||||||
|  |         info_dict = IE.extract('8YoUxe5ncPo') | ||||||
|  |         sub = info_dict[0]['subtitles'][0] | ||||||
|  |         self.assertTrue(sub[2] is not None) | ||||||
| 
 | 
 | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|  | |||||||
| @ -376,6 +376,34 @@ class YoutubeIE(InfoExtractor): | |||||||
|             return (u'Did not fetch video subtitles', None, None) |             return (u'Did not fetch video subtitles', None, None) | ||||||
|         return (None, sub_lang, sub) |         return (None, sub_lang, sub) | ||||||
| 
 | 
 | ||||||
|  |     def _request_automatic_caption(self, video_id, webpage): | ||||||
|  |         """We need the webpage for getting the captions url, pass it as an | ||||||
|  |            argument to speed up the process.""" | ||||||
|  |         sub_lang = self._downloader.params.get('subtitleslang') | ||||||
|  |         sub_format = self._downloader.params.get('subtitlesformat') | ||||||
|  |         self.to_screen(u'%s: Looking for automatic captions' % video_id) | ||||||
|  |         mobj = re.search(r';ytplayer.config = ({.*?});', webpage) | ||||||
|  |         err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang | ||||||
|  |         if mobj is None: | ||||||
|  |             return [(err_msg, None, None)] | ||||||
|  |         player_config = json.loads(mobj.group(1)) | ||||||
|  |         try: | ||||||
|  |             args = player_config[u'args'] | ||||||
|  |             caption_url = args[u'ttsurl'] | ||||||
|  |             timestamp = args[u'timestamp'] | ||||||
|  |             params = compat_urllib_parse.urlencode({ | ||||||
|  |                 'lang': 'en', | ||||||
|  |                 'tlang': sub_lang, | ||||||
|  |                 'fmt': sub_format, | ||||||
|  |                 'ts': timestamp, | ||||||
|  |                 'kind': 'asr', | ||||||
|  |             }) | ||||||
|  |             subtitles_url = caption_url + '&' + params | ||||||
|  |             sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions') | ||||||
|  |             return [(None, sub_lang, sub)] | ||||||
|  |         except KeyError: | ||||||
|  |             return [(err_msg, None, None)] | ||||||
|  | 
 | ||||||
|     def _extract_subtitle(self, video_id): |     def _extract_subtitle(self, video_id): | ||||||
|         """ |         """ | ||||||
|         Return a list with a tuple: |         Return a list with a tuple: | ||||||
| @ -623,7 +651,14 @@ class YoutubeIE(InfoExtractor): | |||||||
|             if video_subtitles: |             if video_subtitles: | ||||||
|                 (sub_error, sub_lang, sub) = video_subtitles[0] |                 (sub_error, sub_lang, sub) = video_subtitles[0] | ||||||
|                 if sub_error: |                 if sub_error: | ||||||
|                     self._downloader.report_error(sub_error) |                     # We try with the automatic captions | ||||||
|  |                     video_subtitles = self._request_automatic_caption(video_id, video_webpage) | ||||||
|  |                     (sub_error_auto, sub_lang, sub) = video_subtitles[0] | ||||||
|  |                     if sub is not None: | ||||||
|  |                         pass | ||||||
|  |                     else: | ||||||
|  |                         # We report the original error | ||||||
|  |                         self._downloader.report_error(sub_error) | ||||||
| 
 | 
 | ||||||
|         if self._downloader.params.get('allsubtitles', False): |         if self._downloader.params.get('allsubtitles', False): | ||||||
|             video_subtitles = self._extract_all_subtitles(video_id) |             video_subtitles = self._extract_all_subtitles(video_id) | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user