[generic] Add support for multiple brightcove URLs (Fixes #2283)
This commit is contained in:
		
							parent
							
								
									b0268cb6ce
								
							
						
					
					
						commit
						99877772d0
					
				| @ -34,6 +34,7 @@ from youtube_dl.extractor import ( | |||||||
|     KhanAcademyIE, |     KhanAcademyIE, | ||||||
|     EveryonesMixtapeIE, |     EveryonesMixtapeIE, | ||||||
|     RutubeChannelIE, |     RutubeChannelIE, | ||||||
|  |     GenericIE, | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @ -229,6 +230,16 @@ class TestPlaylists(unittest.TestCase): | |||||||
|         self.assertEqual(result['id'], '1409') |         self.assertEqual(result['id'], '1409') | ||||||
|         self.assertTrue(len(result['entries']) >= 34) |         self.assertTrue(len(result['entries']) >= 34) | ||||||
| 
 | 
 | ||||||
|  |     def test_multiple_brightcove_videos(self): | ||||||
|  |         # https://github.com/rg3/youtube-dl/issues/2283 | ||||||
|  |         dl = FakeYDL() | ||||||
|  |         ie = GenericIE(dl) | ||||||
|  |         result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html') | ||||||
|  |         self.assertIsPlaylist(result) | ||||||
|  |         self.assertEqual(result['id'], 'always-never-nuclear-command-and-control') | ||||||
|  |         self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker') | ||||||
|  |         self.assertEqual(len(result['entries']), 3) | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|  | |||||||
| @ -127,25 +127,28 @@ class BrightcoveIE(InfoExtractor): | |||||||
| 
 | 
 | ||||||
|     @classmethod |     @classmethod | ||||||
|     def _extract_brightcove_url(cls, webpage): |     def _extract_brightcove_url(cls, webpage): | ||||||
|         """Try to extract the brightcove url from the wepbage, returns None |         """Try to extract the brightcove url from the webpage, returns None | ||||||
|         if it can't be found |         if it can't be found | ||||||
|         """ |         """ | ||||||
|  |         urls = cls._extract_brightcove_urls(webpage) | ||||||
|  |         return urls[0] if urls else None | ||||||
|  | 
 | ||||||
|  |     @classmethod | ||||||
|  |     def _extract_brightcove_urls(cls, webpage): | ||||||
|  |         """Return a list of all Brightcove URLs from the webpage """ | ||||||
| 
 | 
 | ||||||
|         url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage) |         url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage) | ||||||
|         if url_m: |         if url_m: | ||||||
|             return url_m.group(1) |             return [url_m.group(1)] | ||||||
| 
 | 
 | ||||||
|         m_brightcove = re.search( |         matches = re.findall( | ||||||
|             r'''(?sx)<object |             r'''(?sx)<object | ||||||
|             (?: |             (?: | ||||||
|                 [^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 | |                 [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] | | ||||||
|                 [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/ |                 [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/ | ||||||
|             ).+?</object>''', |             ).+?</object>''', | ||||||
|             webpage) |             webpage) | ||||||
|         if m_brightcove is not None: |         return [cls._build_brighcove_url(m) for m in matches] | ||||||
|             return cls._build_brighcove_url(m_brightcove.group()) |  | ||||||
|         else: |  | ||||||
|             return None |  | ||||||
| 
 | 
 | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         url, smuggled_data = unsmuggle_url(url, {}) |         url, smuggled_data = unsmuggle_url(url, {}) | ||||||
|  | |||||||
| @ -234,11 +234,21 @@ class GenericIE(InfoExtractor): | |||||||
|             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader') |             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader') | ||||||
| 
 | 
 | ||||||
|         # Look for BrightCove: |         # Look for BrightCove: | ||||||
|         bc_url = BrightcoveIE._extract_brightcove_url(webpage) |         bc_urls = BrightcoveIE._extract_brightcove_urls(webpage) | ||||||
|         if bc_url is not None: |         if bc_urls: | ||||||
|             self.to_screen('Brightcove video detected.') |             self.to_screen('Brightcove video detected.') | ||||||
|             surl = smuggle_url(bc_url, {'Referer': url}) |             entries = [{ | ||||||
|             return self.url_result(surl, 'Brightcove') |                 '_type': 'url', | ||||||
|  |                 'url': smuggle_url(bc_url, {'Referer': url}), | ||||||
|  |                 'ie_key': 'Brightcove' | ||||||
|  |             } for bc_url in bc_urls] | ||||||
|  | 
 | ||||||
|  |             return { | ||||||
|  |                 '_type': 'playlist', | ||||||
|  |                 'title': video_title, | ||||||
|  |                 'id': video_id, | ||||||
|  |                 'entries': entries, | ||||||
|  |             } | ||||||
| 
 | 
 | ||||||
|         # Look for embedded (iframe) Vimeo player |         # Look for embedded (iframe) Vimeo player | ||||||
|         mobj = re.search( |         mobj = re.search( | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user