2015-12-13 19:15:45 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# coding: utf-8  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-01-29 15:34:35 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								from  __future__  import  unicode_literals  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2013-06-23 21:14:19 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								import  base64  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								from  . common  import  InfoExtractor  
						 
					
						
							
								
									
										
										
										
											2015-12-13 19:15:45 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								from  . . compat  import  (  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    compat_urllib_parse_unquote , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    compat_parse_qs , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								)  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								from  . . utils  import  determine_ext  
						 
					
						
							
								
									
										
										
										
											2013-06-23 21:14:19 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  InfoQIE ( InfoExtractor ) :  
						 
					
						
							
								
									
										
										
										
											2015-06-25 19:54:44 +06:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    _VALID_URL  =  r ' https?://(?:www \ .)?infoq \ .com/(?:[^/]+/)+(?P<id>[^/]+) ' 
							 
						 
					
						
							
								
									
										
										
										
											2014-04-20 01:01:37 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-06-25 19:54:44 +06:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    _TESTS  =  [ { 
							 
						 
					
						
							
								
									
										
										
										
											2014-04-21 02:55:35 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        ' url ' :  ' http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-04-21 03:21:34 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        ' md5 ' :  ' b5ca0e0a8c1fed93b0e65e48e462f9a2 ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-04-21 02:55:35 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
									
										
										
										
											2015-12-13 19:15:45 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' id ' :  ' A-Few-of-My-Favorite-Python-Things ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-04-21 02:55:35 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' ext ' :  ' mp4 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' description ' :  ' Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience. ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' title ' :  ' A Few of My Favorite [Python] Things ' , 
							 
						 
					
						
							
								
									
										
										
										
											2013-06-27 20:27:08 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        } , 
							 
						 
					
						
							
								
									
										
										
										
											2015-06-25 19:54:44 +06:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    } ,  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' url ' :  ' http://www.infoq.com/fr/presentations/changez-avis-sur-javascript ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' only_matching ' :  True , 
							 
						 
					
						
							
								
									
										
										
										
											2015-12-13 19:15:45 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    } ,  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' url ' :  ' http://www.infoq.com/cn/presentations/openstack-continued-delivery ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' md5 ' :  ' 4918d0cca1497f2244572caf626687ef ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' id ' :  ' openstack-continued-delivery ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' title ' :  ' OpenStack持续交付之路 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' ext ' :  ' flv ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' description ' :  ' md5:308d981fb28fa42f49f9568322c683ff ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } , 
							 
						 
					
						
							
								
									
										
										
										
											2015-06-25 19:54:44 +06:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    } ] 
							 
						 
					
						
							
								
									
										
										
										
											2013-06-23 21:14:19 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-12-13 19:15:45 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    def  _extract_bokecc_videos ( self ,  webpage ,  video_id ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # TODO: bokecc.com is a Chinese video cloud platform 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # It should have an independent extractor but I don't have other 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # examples using bokecc 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        player_params_str  =  self . _html_search_regex ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            r ' <script[^>]+src= " http://p \ .bokecc \ .com/player \ ?([^ " ]+) ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            webpage ,  ' player params ' ,  default = None ) 
							 
						 
					
						
							
								
									
										
										
										
											2013-06-23 21:14:19 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-12-13 19:15:45 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        player_params  =  compat_parse_qs ( player_params_str ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        info_xml  =  self . _download_xml ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' http://p.bokecc.com/servlet/playinfo?uid= %s &vid= %s &m=1 '  %  ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                player_params [ ' siteid ' ] [ 0 ] ,  player_params [ ' vid ' ] [ 0 ] ) ,  video_id ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  [ { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' format_id ' :  ' bokecc ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' url ' :  quality . find ( ' ./copy ' ) . attrib [ ' playurl ' ] , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' preference ' :  int ( quality . attrib [ ' value ' ] ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        }  for  quality  in  info_xml . findall ( ' ./video/quality ' ) ] 
							 
						 
					
						
							
								
									
										
										
										
											2013-06-23 21:14:19 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-12-13 19:15:45 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    def  _extract_rtmp_videos ( self ,  webpage ) : 
							 
						 
					
						
							
								
									
										
										
										
											2014-04-20 01:10:30 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        # The server URL is hardcoded 
							 
						 
					
						
							
								
									
										
										
										
											2014-04-20 01:01:37 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        video_url  =  ' rtmpe://video.infoq.com/cfx/st/ ' 
							 
						 
					
						
							
								
									
										
										
										
											2014-04-20 01:10:30 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # Extract video URL 
							 
						 
					
						
							
								
									
										
										
										
											2014-04-21 03:21:34 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        encoded_id  =  self . _search_regex ( 
							 
						 
					
						
							
								
									
										
										
										
											2015-12-13 19:15:45 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            r " jsclassref \ s*= \ s* ' ([^ ' ]*) ' " ,  webpage ,  ' encoded id ' ,  default = None ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-07-17 23:39:54 +06:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        real_id  =  compat_urllib_parse_unquote ( base64 . b64decode ( encoded_id . encode ( ' ascii ' ) ) . decode ( ' utf-8 ' ) ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-04-20 01:10:30 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        playpath  =  ' mp4: '  +  real_id 
							 
						 
					
						
							
								
									
										
										
										
											2013-06-23 21:14:19 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-12-13 19:15:45 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        return  [ { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' format_id ' :  ' rtmp ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' url ' :  video_url , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' ext ' :  determine_ext ( playpath ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' play_path ' :  playpath , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } ] 
							 
						 
					
						
							
								
									
										
										
										
											2013-06-23 21:14:19 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-12-13 19:15:45 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    def  _extract_http_videos ( self ,  webpage ) : 
							 
						 
					
						
							
								
									
										
										
										
											2015-12-13 17:29:27 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        http_video_url  =  self . _search_regex ( r ' P \ .s \ s*= \ s* \' ([^ \' ]+) \' ' ,  webpage ,  ' video URL ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        policy  =  self . _search_regex ( r ' InfoQConstants.scp \ s*= \ s* \' ([^ \' ]+) \' ' ,  webpage ,  ' policy ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        signature  =  self . _search_regex ( r ' InfoQConstants.scs \ s*= \ s* \' ([^ \' ]+) \' ' ,  webpage ,  ' signature ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        key_pair_id  =  self . _search_regex ( r ' InfoQConstants.sck \ s*= \ s* \' ([^ \' ]+) \' ' ,  webpage ,  ' key-pair-id ' ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-04-21 03:21:34 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-12-13 19:15:45 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        return  [ { 
							 
						 
					
						
							
								
									
										
										
										
											2014-04-21 03:21:34 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' format_id ' :  ' http ' , 
							 
						 
					
						
							
								
									
										
										
										
											2015-12-13 17:29:27 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' url ' :  http_video_url , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' http_headers ' :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' Cookie ' :  ' CloudFront-Policy= %s ; CloudFront-Signature= %s ; CloudFront-Key-Pair-Id= %s '  %  ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    policy ,  signature ,  key_pair_id ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            } , 
							 
						 
					
						
							
								
									
										
										
										
											2014-04-21 03:21:34 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        } ] 
							 
						 
					
						
							
								
									
										
										
										
											2015-12-13 19:15:45 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  _real_extract ( self ,  url ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        video_id  =  self . _match_id ( url ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        webpage  =  self . _download_webpage ( url ,  video_id ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        video_title  =  self . _html_search_regex ( r ' <title>(.*?)</title> ' ,  webpage ,  ' title ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        video_description  =  self . _html_search_meta ( ' description ' ,  webpage ,  ' description ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  ' /cn/ '  in  url : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            # for China videos, HTTP video URL exists but always fails with 403 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            formats  =  self . _extract_bokecc_videos ( webpage ,  video_id ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        else : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            formats  =  self . _extract_rtmp_videos ( webpage )  +  self . _extract_http_videos ( webpage ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-04-21 03:21:34 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        self . _sort_formats ( formats ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-04-21 02:55:35 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        return  { 
							 
						 
					
						
							
								
									
										
										
										
											2013-06-23 21:14:19 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								            ' id ' :  video_id , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' title ' :  video_title , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' description ' :  video_description , 
							 
						 
					
						
							
								
									
										
										
										
											2014-04-21 03:21:34 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' formats ' :  formats , 
							 
						 
					
						
							
								
									
										
										
										
											2014-04-21 02:55:35 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        }