2014-08-15 21:38:41 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								from  __future__  import  unicode_literals  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								from  . common  import  InfoExtractor  
						 
					
						
							
								
									
										
										
										
											2014-12-12 04:23:34 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								from  . . utils  import  (  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    find_xpath_attr , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    int_or_none , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    js_to_json , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    unescapeHTML , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								)  
						 
					
						
							
								
									
										
										
										
											2014-08-15 21:38:41 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  HowStuffWorksIE ( InfoExtractor ) :  
						 
					
						
							
								
									
										
										
										
											2015-07-17 16:45:53 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    _VALID_URL  =  r ' https?://[ \ da-z-]+ \ .howstuffworks \ .com/(?:[^/]+/)*(?: \ d+-)?(?P<id>.+?)-video \ .htm ' 
							 
						 
					
						
							
								
									
										
										
										
											2014-08-15 21:38:41 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    _TESTS  =  [ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        { 
							 
						 
					
						
							
								
									
										
										
										
											2014-12-05 19:46:49 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' url ' :  ' http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-08-15 21:38:41 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								            ' info_dict ' :  { 
							 
						 
					
						
							
								
									
										
										
										
											2014-12-05 19:46:49 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								                ' id ' :  ' 450221 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' ext ' :  ' flv ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' title ' :  ' Cool Jobs - Iditarod Musher ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' description ' :  ' Cold sleds, freezing temps and warm dog breath... an Iditarod musher \' s dream. Kasey-Dee Gardner jumps on a sled to find out what the big deal is. ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-12-12 04:23:34 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								                ' display_id ' :  ' cool-jobs-iditarod-musher ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' thumbnail ' :  ' re:^https?://.* \ .jpg$ ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' duration ' :  161 , 
							 
						 
					
						
							
								
									
										
										
										
											2014-08-15 21:38:41 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								            } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        { 
							 
						 
					
						
							
								
									
										
										
										
											2014-10-11 15:59:42 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' url ' :  ' http://adventure.howstuffworks.com/7199-survival-zone-food-and-water-in-the-savanna-video.htm ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-08-15 21:38:41 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								            ' info_dict ' :  { 
							 
						 
					
						
							
								
									
										
										
										
											2014-10-11 15:59:42 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								                ' id ' :  ' 453464 ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-08-15 21:38:41 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								                ' ext ' :  ' mp4 ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-10-11 15:59:42 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								                ' title ' :  ' Survival Zone: Food and Water In the Savanna ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-12-05 00:09:17 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								                ' description ' :  ' Learn how to find both food and water while trekking in the African savannah. In this video from the Discovery Channel. ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-12-12 04:23:34 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								                ' display_id ' :  ' survival-zone-food-and-water-in-the-savanna ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' thumbnail ' :  ' re:^https?://.* \ .jpg$ ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-08-15 21:38:41 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								            } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        { 
							 
						 
					
						
							
								
									
										
										
										
											2014-12-05 19:46:49 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' url ' :  ' http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-08-15 21:38:41 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								            ' info_dict ' :  { 
							 
						 
					
						
							
								
									
										
										
										
											2014-12-05 19:46:49 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								                ' id ' :  ' 440011 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' ext ' :  ' flv ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' title ' :  ' Sword Swallowing #1 by Dan Meyer ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' description ' :  ' Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org> ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-12-12 04:23:34 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								                ' display_id ' :  ' sword-swallowing-1-by-dan-meyer ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' thumbnail ' :  ' re:^https?://.* \ .jpg$ ' , 
							 
						 
					
						
							
								
									
										
										
										
											2014-08-15 21:38:41 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								            } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } , 
							 
						 
					
						
							
								
									
										
										
										
											2015-07-17 16:43:27 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' url ' :  ' http://shows.howstuffworks.com/stuff-to-blow-your-mind/optical-illusions-video.htm ' , 
							 
						 
					
						
							
								
									
										
										
										
											2015-07-17 17:07:55 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' only_matching ' :  True , 
							 
						 
					
						
							
								
									
										
										
										
											2015-07-17 16:43:27 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        } 
							 
						 
					
						
							
								
									
										
										
										
											2014-08-15 21:38:41 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								    ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  _real_extract ( self ,  url ) : 
							 
						 
					
						
							
								
									
										
										
										
											2014-12-05 00:09:17 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        display_id  =  self . _match_id ( url ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-08-15 21:38:41 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								        webpage  =  self . _download_webpage ( url ,  display_id ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-12-12 04:23:34 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        clip_js  =  self . _search_regex ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            r ' (?s)var clip = ( { .*?}); ' ,  webpage ,  ' clip info ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        clip_info  =  self . _parse_json ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            clip_js ,  display_id ,  transform_source = js_to_json ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-12-05 19:46:49 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-12-12 04:23:34 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        video_id  =  clip_info [ ' content_id ' ] 
							 
						 
					
						
							
								
									
										
										
										
											2014-08-15 21:38:41 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								        formats  =  [ ] 
							 
						 
					
						
							
								
									
										
										
										
											2014-12-12 04:23:34 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        m3u8_url  =  clip_info . get ( ' m3u8 ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  m3u8_url : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            formats  + =  self . _extract_m3u8_formats ( m3u8_url ,  video_id ,  ' mp4 ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        for  video  in  clip_info . get ( ' mp4 ' ,  [ ] ) : 
							 
						 
					
						
							
								
									
										
										
										
											2014-12-05 00:09:17 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            formats . append ( { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' url ' :  video [ ' src ' ] , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' format_id ' :  video [ ' bitrate ' ] , 
							 
						 
					
						
							
								
									
										
										
										
											2014-12-05 19:46:49 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								                ' vbr ' :  int ( video [ ' bitrate ' ] . rstrip ( ' k ' ) ) , 
							 
						 
					
						
							
								
									
										
										
										
											2014-12-05 00:09:17 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            } ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-12-05 19:46:49 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  not  formats : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            smil  =  self . _download_xml ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' http://services.media.howstuffworks.com/videos/ %s /smil-service.smil '  %  video_id , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                video_id ,  ' Downloading video SMIL ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            http_base  =  find_xpath_attr ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                smil , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' ./ {0} head/ {0} meta ' . format ( ' { http://www.w3.org/2001/SMIL20/Language} ' ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' name ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' httpBase ' ) . get ( ' content ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            URL_SUFFIX  =  ' ?v=2.11.3&fp=LNX 11,2,202,356&r=A&g=A ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            for  video  in  smil . findall ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    ' ./ {0} body/ {0} switch/ {0} video ' . format ( ' { http://www.w3.org/2001/SMIL20/Language} ' ) ) : 
							 
						 
					
						
							
								
									
										
										
										
											2014-12-12 04:23:34 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								                vbr  =  int_or_none ( video . attrib [ ' system-bitrate ' ] ,  scale = 1000 ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-12-05 19:46:49 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								                formats . append ( { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    ' url ' :  ' %s / %s %s '  %  ( http_base ,  video . attrib [ ' src ' ] ,  URL_SUFFIX ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    ' format_id ' :  ' %d k '  %  vbr , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    ' vbr ' :  vbr , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                } ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-08-15 21:38:41 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								        self . _sort_formats ( formats ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  { 
							 
						 
					
						
							
								
									
										
										
										
											2014-12-12 04:23:34 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' id ' :  ' %s '  %  video_id , 
							 
						 
					
						
							
								
									
										
										
										
											2014-08-15 21:38:41 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								            ' display_id ' :  display_id , 
							 
						 
					
						
							
								
									
										
										
										
											2014-12-12 04:23:34 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' title ' :  unescapeHTML ( clip_info [ ' clip_title ' ] ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' description ' :  unescapeHTML ( clip_info . get ( ' caption ' ) ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' thumbnail ' :  clip_info . get ( ' video_still_url ' ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' duration ' :  clip_info . get ( ' duration ' ) , 
							 
						 
					
						
							
								
									
										
										
										
											2014-08-15 21:38:41 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								            ' formats ' :  formats , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        }