| 
									
										
										
										
											2014-11-26 22:52:52 +01:00
										 |  |  | # coding: utf-8 | 
					
						
							|  |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import json | 
					
						
							|  |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2016-07-03 14:12:02 +08:00
										 |  |  | from .facebook import FacebookIE | 
					
						
							| 
									
										
										
										
											2014-11-26 22:52:52 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class BuzzFeedIE(InfoExtractor): | 
					
						
							|  |  |  |     _VALID_URL = r'https?://(?:www\.)?buzzfeed\.com/[^?#]*?/(?P<id>[^?#]+)' | 
					
						
							| 
									
										
										
										
											2014-11-27 15:44:35 +01:00
										 |  |  |     _TESTS = [{ | 
					
						
							| 
									
										
										
										
											2014-11-26 22:52:52 +01:00
										 |  |  |         'url': 'http://www.buzzfeed.com/abagg/this-angry-ram-destroys-a-punching-bag-like-a-boss?utm_term=4ldqpia', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': 'this-angry-ram-destroys-a-punching-bag-like-a-boss', | 
					
						
							|  |  |  |             'title': 'This Angry Ram Destroys A Punching Bag Like A Boss', | 
					
						
							|  |  |  |             'description': 'Rambro!', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'playlist': [{ | 
					
						
							|  |  |  |             'info_dict': { | 
					
						
							|  |  |  |                 'id': 'aVCR29aE_OQ', | 
					
						
							|  |  |  |                 'ext': 'mp4', | 
					
						
							| 
									
										
										
										
											2016-07-03 14:12:02 +08:00
										 |  |  |                 'title': 'Angry Ram destroys a punching bag..', | 
					
						
							|  |  |  |                 'description': 'md5:c59533190ef23fd4458a5e8c8c872345', | 
					
						
							| 
									
										
										
										
											2014-11-26 22:52:52 +01:00
										 |  |  |                 'upload_date': '20141024', | 
					
						
							|  |  |  |                 'uploader_id': 'Buddhanz1', | 
					
						
							| 
									
										
										
										
											2016-07-03 14:12:02 +08:00
										 |  |  |                 'uploader': 'Angry Ram', | 
					
						
							| 
									
										
										
										
											2014-11-26 22:52:52 +01:00
										 |  |  |             } | 
					
						
							|  |  |  |         }] | 
					
						
							| 
									
										
										
										
											2014-11-27 15:44:35 +01:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://www.buzzfeed.com/sheridanwatson/look-at-this-cute-dog-omg?utm_term=4ldqpia', | 
					
						
							|  |  |  |         'params': { | 
					
						
							|  |  |  |             'skip_download': True,  # Got enough YouTube download tests | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2015-02-18 00:41:45 +01:00
										 |  |  |             'id': 'look-at-this-cute-dog-omg', | 
					
						
							| 
									
										
										
										
											2015-01-08 17:05:46 +01:00
										 |  |  |             'description': 're:Munchkin the Teddy Bear is back ?!', | 
					
						
							| 
									
										
										
										
											2014-11-27 15:44:35 +01:00
										 |  |  |             'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'playlist': [{ | 
					
						
							|  |  |  |             'info_dict': { | 
					
						
							|  |  |  |                 'id': 'mVmBL8B-In0', | 
					
						
							|  |  |  |                 'ext': 'mp4', | 
					
						
							| 
									
										
										
										
											2016-07-03 14:12:02 +08:00
										 |  |  |                 'title': 're:Munchkin the Teddy Bear gets her exercise', | 
					
						
							|  |  |  |                 'description': 'md5:28faab95cda6e361bcff06ec12fc21d8', | 
					
						
							| 
									
										
										
										
											2014-11-27 15:44:35 +01:00
										 |  |  |                 'upload_date': '20141124', | 
					
						
							|  |  |  |                 'uploader_id': 'CindysMunchkin', | 
					
						
							| 
									
										
										
										
											2015-02-18 00:41:45 +01:00
										 |  |  |                 'uploader': 're:^Munchkin the', | 
					
						
							| 
									
										
										
										
											2014-11-27 15:44:35 +01:00
										 |  |  |             }, | 
					
						
							|  |  |  |         }] | 
					
						
							| 
									
										
										
										
											2016-07-03 14:12:02 +08:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://www.buzzfeed.com/craigsilverman/the-most-adorable-crash-landing-ever#.eq7pX0BAmK', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': 'the-most-adorable-crash-landing-ever', | 
					
						
							|  |  |  |             'title': 'Watch This Baby Goose Make The Most Adorable Crash Landing', | 
					
						
							|  |  |  |             'description': 'This gosling knows how to stick a landing.', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         'playlist': [{ | 
					
						
							|  |  |  |             'md5': '763ca415512f91ca62e4621086900a23', | 
					
						
							|  |  |  |             'info_dict': { | 
					
						
							|  |  |  |                 'id': '971793786185728', | 
					
						
							|  |  |  |                 'ext': 'mp4', | 
					
						
							|  |  |  |                 'title': 'We set up crash pads so that the goslings on our roof would have a safe landi...', | 
					
						
							|  |  |  |                 'uploader': 'Calgary Outdoor Centre-University of Calgary', | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         }], | 
					
						
							|  |  |  |         'add_ie': ['Facebook'], | 
					
						
							| 
									
										
										
										
											2014-11-27 15:44:35 +01:00
										 |  |  |     }] | 
					
						
							| 
									
										
										
										
											2014-11-26 22:52:52 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         playlist_id = self._match_id(url) | 
					
						
							|  |  |  |         webpage = self._download_webpage(url, playlist_id) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         all_buckets = re.findall( | 
					
						
							| 
									
										
										
										
											2014-11-27 15:44:35 +01:00
										 |  |  |             r'(?s)<div class="video-embed[^"]*"..*?rel:bf_bucket_data=\'([^\']+)\'', | 
					
						
							| 
									
										
										
										
											2014-11-26 22:52:52 +01:00
										 |  |  |             webpage) | 
					
						
							| 
									
										
										
										
											2014-11-27 15:44:35 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-11-26 22:52:52 +01:00
										 |  |  |         entries = [] | 
					
						
							|  |  |  |         for bd_json in all_buckets: | 
					
						
							|  |  |  |             bd = json.loads(bd_json) | 
					
						
							| 
									
										
										
										
											2014-11-27 15:44:35 +01:00
										 |  |  |             video = bd.get('video') or bd.get('progload_video') | 
					
						
							|  |  |  |             if not video: | 
					
						
							| 
									
										
										
										
											2014-11-26 22:52:52 +01:00
										 |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2014-11-27 15:44:35 +01:00
										 |  |  |             entries.append(self.url_result(video['url'])) | 
					
						
							| 
									
										
										
										
											2014-11-26 22:52:52 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-03 14:12:02 +08:00
										 |  |  |         facebook_url = FacebookIE._extract_url(webpage) | 
					
						
							|  |  |  |         if facebook_url: | 
					
						
							|  |  |  |             entries.append(self.url_result(facebook_url)) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-11-26 22:52:52 +01:00
										 |  |  |         return { | 
					
						
							|  |  |  |             '_type': 'playlist', | 
					
						
							|  |  |  |             'id': playlist_id, | 
					
						
							|  |  |  |             'title': self._og_search_title(webpage), | 
					
						
							|  |  |  |             'description': self._og_search_description(webpage), | 
					
						
							|  |  |  |             'entries': entries, | 
					
						
							|  |  |  |         } |