| 
									
										
										
										
											2014-04-21 13:45:27 +02:00
										 |  |  | # coding: utf-8 | 
					
						
							|  |  |  | from __future__ import unicode_literals | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-10-17 23:12:58 +01:00
										 |  |  | import re | 
					
						
							| 
									
										
										
										
											2014-04-21 13:45:27 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | from .common import InfoExtractor | 
					
						
							| 
									
										
										
										
											2015-12-03 22:43:19 +01:00
										 |  |  | from ..compat import compat_str | 
					
						
							| 
									
										
										
										
											2014-04-21 13:45:27 +02:00
										 |  |  | from ..utils import ( | 
					
						
							|  |  |  |     int_or_none, | 
					
						
							| 
									
										
										
										
											2015-10-17 23:12:58 +01:00
										 |  |  |     unescapeHTML, | 
					
						
							| 
									
										
										
										
											2015-04-21 02:32:10 +08:00
										 |  |  |     ExtractorError, | 
					
						
							| 
									
										
										
										
											2015-12-03 22:01:32 +01:00
										 |  |  |     xpath_text, | 
					
						
							| 
									
										
										
										
											2014-04-21 13:45:27 +02:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class BiliBiliIE(InfoExtractor): | 
					
						
							| 
									
										
										
										
											2016-03-21 21:36:32 +06:00
										 |  |  |     _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?' | 
					
						
							| 
									
										
										
										
											2014-04-21 13:45:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-04-30 18:23:35 +08:00
										 |  |  |     _TESTS = [{ | 
					
						
							| 
									
										
										
										
											2014-04-21 13:45:27 +02:00
										 |  |  |         'url': 'http://www.bilibili.tv/video/av1074402/', | 
					
						
							|  |  |  |         'md5': '2c301e4dab317596e837c3e7633e7d86', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							| 
									
										
										
										
											2015-10-17 17:28:09 +01:00
										 |  |  |             'id': '1554319', | 
					
						
							| 
									
										
										
										
											2014-04-21 13:45:27 +02:00
										 |  |  |             'ext': 'flv', | 
					
						
							|  |  |  |             'title': '【金坷垃】金泡沫', | 
					
						
							| 
									
										
										
										
											2015-10-17 17:28:09 +01:00
										 |  |  |             'duration': 308313, | 
					
						
							| 
									
										
										
										
											2014-04-21 13:45:27 +02:00
										 |  |  |             'upload_date': '20140420', | 
					
						
							|  |  |  |             'thumbnail': 're:^https?://.+\.jpg', | 
					
						
							| 
									
										
										
										
											2015-10-17 17:28:09 +01:00
										 |  |  |             'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', | 
					
						
							|  |  |  |             'timestamp': 1397983878, | 
					
						
							|  |  |  |             'uploader': '菊子桑', | 
					
						
							| 
									
										
										
										
											2014-04-21 13:45:27 +02:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2015-04-30 18:23:35 +08:00
										 |  |  |     }, { | 
					
						
							|  |  |  |         'url': 'http://www.bilibili.com/video/av1041170/', | 
					
						
							|  |  |  |         'info_dict': { | 
					
						
							|  |  |  |             'id': '1041170', | 
					
						
							|  |  |  |             'title': '【BD1080P】刀语【诸神&异域】', | 
					
						
							| 
									
										
										
										
											2015-10-17 23:12:58 +01:00
										 |  |  |             'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~', | 
					
						
							|  |  |  |             'uploader': '枫叶逝去', | 
					
						
							|  |  |  |             'timestamp': 1396501299, | 
					
						
							| 
									
										
										
										
											2015-04-30 18:23:35 +08:00
										 |  |  |         }, | 
					
						
							| 
									
										
										
										
											2015-10-17 23:12:58 +01:00
										 |  |  |         'playlist_count': 9, | 
					
						
							| 
									
										
										
										
											2015-04-30 18:23:35 +08:00
										 |  |  |     }] | 
					
						
							| 
									
										
										
										
											2014-04-21 13:45:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-10-17 23:12:58 +01:00
										 |  |  |     def _real_extract(self, url): | 
					
						
							|  |  |  |         mobj = re.match(self._VALID_URL, url) | 
					
						
							|  |  |  |         video_id = mobj.group('id') | 
					
						
							|  |  |  |         page_num = mobj.group('page_num') or '1' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         view_data = self._download_json( | 
					
						
							|  |  |  |             'http://api.bilibili.com/view?type=json&appkey=8e9fc618fbd41e28&id=%s&page=%s' % (video_id, page_num), | 
					
						
							|  |  |  |             video_id) | 
					
						
							|  |  |  |         if 'error' in view_data: | 
					
						
							|  |  |  |             raise ExtractorError('%s said: %s' % (self.IE_NAME, view_data['error']), expected=True) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         cid = view_data['cid'] | 
					
						
							|  |  |  |         title = unescapeHTML(view_data['title']) | 
					
						
							| 
									
										
										
										
											2015-04-30 18:23:35 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-12-03 22:01:32 +01:00
										 |  |  |         doc = self._download_xml( | 
					
						
							| 
									
										
										
										
											2015-10-17 17:28:09 +01:00
										 |  |  |             'http://interface.bilibili.com/v_cdn_play?appkey=8e9fc618fbd41e28&cid=%s' % cid, | 
					
						
							|  |  |  |             cid, | 
					
						
							| 
									
										
										
										
											2015-10-17 23:12:58 +01:00
										 |  |  |             'Downloading page %s/%s' % (page_num, view_data['pages']) | 
					
						
							| 
									
										
										
										
											2015-01-08 01:33:22 +06:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2015-05-27 04:23:21 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-12-03 22:01:32 +01:00
										 |  |  |         if xpath_text(doc, './result') == 'error': | 
					
						
							|  |  |  |             raise ExtractorError('%s said: %s' % (self.IE_NAME, xpath_text(doc, './message')), expected=True) | 
					
						
							| 
									
										
										
										
											2014-04-21 13:45:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-10-17 17:28:09 +01:00
										 |  |  |         entries = [] | 
					
						
							| 
									
										
										
										
											2015-04-30 18:23:35 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-10-21 08:24:05 +01:00
										 |  |  |         for durl in doc.findall('./durl'): | 
					
						
							| 
									
										
										
										
											2015-12-03 22:01:32 +01:00
										 |  |  |             size = xpath_text(durl, ['./filesize', './size']) | 
					
						
							| 
									
										
										
										
											2015-10-21 08:24:05 +01:00
										 |  |  |             formats = [{ | 
					
						
							| 
									
										
										
										
											2015-10-17 18:30:51 +01:00
										 |  |  |                 'url': durl.find('./url').text, | 
					
						
							| 
									
										
										
										
											2015-12-03 22:01:32 +01:00
										 |  |  |                 'filesize': int_or_none(size), | 
					
						
							| 
									
										
										
										
											2015-10-17 18:30:51 +01:00
										 |  |  |                 'ext': 'flv', | 
					
						
							| 
									
										
										
										
											2015-10-21 08:24:05 +01:00
										 |  |  |             }] | 
					
						
							|  |  |  |             backup_urls = durl.find('./backup_url') | 
					
						
							|  |  |  |             if backup_urls is not None: | 
					
						
							|  |  |  |                 for backup_url in backup_urls.findall('./url'): | 
					
						
							|  |  |  |                     formats.append({'url': backup_url.text}) | 
					
						
							|  |  |  |             formats.reverse() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-04-30 18:23:35 +08:00
										 |  |  |             entries.append({ | 
					
						
							| 
									
										
										
										
											2015-12-03 22:01:32 +01:00
										 |  |  |                 'id': '%s_part%s' % (cid, xpath_text(durl, './order')), | 
					
						
							| 
									
										
										
										
											2015-04-30 18:23:35 +08:00
										 |  |  |                 'title': title, | 
					
						
							| 
									
										
										
										
											2015-12-03 22:01:32 +01:00
										 |  |  |                 'duration': int_or_none(xpath_text(durl, './length'), 1000), | 
					
						
							| 
									
										
										
										
											2015-10-17 18:30:51 +01:00
										 |  |  |                 'formats': formats, | 
					
						
							| 
									
										
										
										
											2015-01-08 01:33:22 +06:00
										 |  |  |             }) | 
					
						
							| 
									
										
										
										
											2014-04-21 13:45:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-10-17 17:28:09 +01:00
										 |  |  |         info = { | 
					
						
							| 
									
										
										
										
											2015-12-03 22:01:32 +01:00
										 |  |  |             'id': compat_str(cid), | 
					
						
							| 
									
										
										
										
											2015-10-17 17:28:09 +01:00
										 |  |  |             'title': title, | 
					
						
							|  |  |  |             'description': view_data.get('description'), | 
					
						
							|  |  |  |             'thumbnail': view_data.get('pic'), | 
					
						
							|  |  |  |             'uploader': view_data.get('author'), | 
					
						
							|  |  |  |             'timestamp': int_or_none(view_data.get('created')), | 
					
						
							| 
									
										
										
										
											2015-12-03 22:43:19 +01:00
										 |  |  |             'view_count': int_or_none(view_data.get('play')), | 
					
						
							| 
									
										
										
										
											2015-12-03 22:01:32 +01:00
										 |  |  |             'duration': int_or_none(xpath_text(doc, './timelength')), | 
					
						
							| 
									
										
										
										
											2014-04-21 13:45:27 +02:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2015-10-17 17:28:09 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if len(entries) == 1: | 
					
						
							|  |  |  |             entries[0].update(info) | 
					
						
							|  |  |  |             return entries[0] | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             info.update({ | 
					
						
							|  |  |  |                 '_type': 'multi_video', | 
					
						
							| 
									
										
										
										
											2015-10-17 23:12:58 +01:00
										 |  |  |                 'id': video_id, | 
					
						
							| 
									
										
										
										
											2015-10-17 17:28:09 +01:00
										 |  |  |                 'entries': entries, | 
					
						
							|  |  |  |             }) | 
					
						
							|  |  |  |             return info |