| 
									
										
										
										
											2014-02-10 20:48:46 +01:00
										 |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-06-23 22:59:51 +02:00
										 |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2018-04-23 00:16:52 +07:00
										 |  |  | from .youtube import YoutubeIE | 
					
						
							| 
									
										
										
										
											2017-01-06 11:25:48 +01:00
										 |  |  | from ..compat import compat_str | 
					
						
							| 
									
										
										
										
											2018-04-23 00:16:52 +07:00
										 |  |  | from ..utils import int_or_none | 
					
						
							| 
									
										
										
										
											2013-06-23 22:59:51 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class BreakIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2018-04-23 00:16:52 +07:00
										 |  |  |     _VALID_URL = r'https?://(?:www\.)?break\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)' | 
					
						
							| 
									
										
										
										
											2014-10-03 18:57:18 +07:00
										 |  |  |     _TESTS = [{ | 
					
						
							| 
									
										
										
										
											2014-02-10 20:48:46 +01:00
										 |  |  |         'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '2468056', | 
					
						
							|  |  |  |             'ext': 'mp4', | 
					
						
							|  |  |  |             'title': 'When Girls Act Like D-Bags', | 
					
						
							| 
									
										
										
										
											2015-08-08 21:58:24 +06:00
										 |  |  |             'age_limit': 13, | 
					
						
							| 
									
										
										
										
											2017-01-06 11:25:48 +01:00
										 |  |  |         }, | 
					
						
							|  |  |  |     }, { | 
					
						
							| 
									
										
										
										
											2018-04-23 00:16:52 +07:00
										 |  |  |         # youtube embed | 
					
						
							|  |  |  |         'url': 'http://www.break.com/video/someone-forgot-boat-brakes-work', | 
					
						
							| 
									
										
										
										
											2017-01-06 11:25:48 +01:00
										 |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2018-04-23 00:16:52 +07:00
										 |  |  |             'id': 'RrrDLdeL2HQ', | 
					
						
							| 
									
										
										
										
											2017-01-06 11:25:48 +01:00
										 |  |  |             'ext': 'mp4', | 
					
						
							| 
									
										
										
										
											2018-04-23 00:16:52 +07:00
										 |  |  |             'title': 'Whale Watching Boat Crashing Into San Diego Dock', | 
					
						
							|  |  |  |             'description': 'md5:afc1b2772f0a8468be51dd80eb021069', | 
					
						
							|  |  |  |             'upload_date': '20160331', | 
					
						
							|  |  |  |             'uploader': 'Steve Holden', | 
					
						
							|  |  |  |             'uploader_id': 'sdholden07', | 
					
						
							| 
									
										
										
										
											2017-01-06 11:25:48 +01:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2018-04-23 00:16:52 +07:00
										 |  |  |         'params': { | 
					
						
							|  |  |  |             'skip_download': True, | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2014-10-03 18:57:18 +07:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://www.break.com/video/ugc/baby-flex-2773063', | 
					
						
							|  |  |  |         'only_matching': True, | 
					
						
							|  |  |  |     }] | 
					
						
							| 
									
										
										
										
											2013-06-23 22:59:51 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def _real_extract(self, url): | 
					
						
							| 
									
										
										
										
											2018-04-23 00:16:52 +07:00
										 |  |  |         display_id, video_id = re.match(self._VALID_URL, url).groups() | 
					
						
							| 
									
										
										
										
											2017-01-06 11:25:48 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-04-23 00:16:52 +07:00
										 |  |  |         webpage = self._download_webpage(url, display_id) | 
					
						
							| 
									
										
										
										
											2017-01-06 11:25:48 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-04-23 00:16:52 +07:00
										 |  |  |         youtube_url = YoutubeIE._extract_url(webpage) | 
					
						
							|  |  |  |         if youtube_url: | 
					
						
							|  |  |  |             return self.url_result(youtube_url, ie=YoutubeIE.ie_key()) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         content = self._parse_json( | 
					
						
							| 
									
										
										
										
											2017-01-06 11:25:48 +01:00
										 |  |  |             self._search_regex( | 
					
						
							| 
									
										
										
										
											2018-04-23 00:16:52 +07:00
										 |  |  |                 r'(?s)content["\']\s*:\s*(\[.+?\])\s*[,\n]', webpage, | 
					
						
							|  |  |  |                 'content'), | 
					
						
							| 
									
										
										
										
											2017-01-06 11:25:48 +01:00
										 |  |  |             display_id) | 
					
						
							| 
									
										
										
										
											2014-10-03 19:37:47 +07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-01-06 11:25:48 +01:00
										 |  |  |         formats = [] | 
					
						
							| 
									
										
										
										
											2018-04-23 00:16:52 +07:00
										 |  |  |         for video in content: | 
					
						
							|  |  |  |             video_url = video.get('url') | 
					
						
							|  |  |  |             if not video_url or not isinstance(video_url, compat_str): | 
					
						
							| 
									
										
										
										
											2017-01-06 11:25:48 +01:00
										 |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2018-04-23 00:16:52 +07:00
										 |  |  |             bitrate = int_or_none(self._search_regex( | 
					
						
							|  |  |  |                 r'(\d+)_kbps', video_url, 'tbr', default=None)) | 
					
						
							| 
									
										
										
										
											2014-10-03 19:37:47 +07:00
										 |  |  |             formats.append({ | 
					
						
							| 
									
										
										
										
											2018-04-23 00:16:52 +07:00
										 |  |  |                 'url': video_url, | 
					
						
							| 
									
										
										
										
											2017-01-06 11:25:48 +01:00
										 |  |  |                 'format_id': 'http-%d' % bitrate if bitrate else 'http', | 
					
						
							|  |  |  |                 'tbr': bitrate, | 
					
						
							| 
									
										
										
										
											2014-10-03 19:37:47 +07:00
										 |  |  |             }) | 
					
						
							| 
									
										
										
										
											2018-04-23 00:16:52 +07:00
										 |  |  |         self._sort_formats(formats) | 
					
						
							| 
									
										
										
										
											2014-10-03 19:37:47 +07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-04-23 00:16:52 +07:00
										 |  |  |         title = self._search_regex( | 
					
						
							|  |  |  |             (r'title["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', | 
					
						
							|  |  |  |              r'<h1[^>]*>(?P<value>[^<]+)'), webpage, 'title', group='value') | 
					
						
							| 
									
										
										
										
											2014-10-03 19:37:47 +07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-04-23 00:16:52 +07:00
										 |  |  |         def get(key, name): | 
					
						
							|  |  |  |             return int_or_none(self._search_regex( | 
					
						
							|  |  |  |                 r'%s["\']\s*:\s*["\'](\d+)' % key, webpage, name, | 
					
						
							|  |  |  |                 default=None)) | 
					
						
							| 
									
										
										
										
											2017-01-06 11:25:48 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-04-23 00:16:52 +07:00
										 |  |  |         age_limit = get('ratings', 'age limit') | 
					
						
							|  |  |  |         video_id = video_id or get('pid', 'video id') or display_id | 
					
						
							| 
									
										
										
										
											2014-10-03 19:37:47 +07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-02-10 20:48:46 +01:00
										 |  |  |         return { | 
					
						
							|  |  |  |             'id': video_id, | 
					
						
							| 
									
										
										
										
											2017-01-06 11:25:48 +01:00
										 |  |  |             'display_id': display_id, | 
					
						
							|  |  |  |             'title': title, | 
					
						
							| 
									
										
										
										
											2018-04-23 00:16:52 +07:00
										 |  |  |             'thumbnail': self._og_search_thumbnail(webpage), | 
					
						
							|  |  |  |             'age_limit': age_limit, | 
					
						
							| 
									
										
										
										
											2014-10-03 19:37:47 +07:00
										 |  |  |             'formats': formats, | 
					
						
							| 
									
										
										
										
											2014-02-10 20:48:46 +01:00
										 |  |  |         } |