2014-05-03 02:28:38 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								from  __future__  import  unicode_literals  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2016-09-01 19:07:41 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								import  hmac  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import  hashlib  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import  base64  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-05-03 02:28:38 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								from  . common  import  InfoExtractor  
						 
					
						
							
								
									
										
										
										
											2015-03-19 21:23:52 +06:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								from  . . utils  import  (  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    float_or_none , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    int_or_none , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    parse_iso8601 , 
							 
						 
					
						
							
								
									
										
										
										
											2016-09-01 19:07:41 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    mimetype2ext , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    determine_ext , 
							 
						 
					
						
							
								
									
										
										
										
											2015-03-19 21:23:52 +06:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								)  
						 
					
						
							
								
									
										
										
										
											2014-05-03 02:28:38 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-05-04 22:32:57 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								class  NYTimesBaseIE ( InfoExtractor ) :  
						 
					
						
							
								
									
										
										
										
											2016-09-01 19:07:41 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    _SECRET  =  b ' pX(2MbU2);4N { 7J8)>YwKRJ+/pQ3JkiU2Q^V>mFYv6g6gYvt6v ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-05-04 22:32:57 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    def  _extract_video_from_id ( self ,  video_id ) : 
							 
						 
					
						
							
								
									
										
										
										
											2016-09-01 19:07:41 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        # Authorization generation algorithm is reverse engineered from `signer` in 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # http://graphics8.nytimes.com/video/vhs/vhs-2.x.min.js 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        path  =  ' /svc/video/api/v3/video/ '  +  video_id 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        hm  =  hmac . new ( self . _SECRET ,  ( path  +  ' :vhs ' ) . encode ( ) ,  hashlib . sha512 ) . hexdigest ( ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        video_data  =  self . _download_json ( ' http://www.nytimes.com '  +  path ,  video_id ,  ' Downloading video JSON ' ,  headers = { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' Authorization ' :  ' NYTV  '  +  base64 . b64encode ( hm . encode ( ) ) . decode ( ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' X-NYTV ' :  ' vhs ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } ,  fatal = False ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        if  not  video_data : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            video_data  =  self . _download_json ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' http://www.nytimes.com/svc/video/api/v2/video/ '  +  video_id , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                video_id ,  ' Downloading video JSON ' ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-05-03 02:28:38 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        title  =  video_data [ ' headline ' ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-05-03 03:11:38 +07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        def  get_file_size ( file_size ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  isinstance ( file_size ,  int ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                return  file_size 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            elif  isinstance ( file_size ,  dict ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                return  int ( file_size . get ( ' value ' ,  0 ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            else : 
							 
						 
					
						
							
								
									
										
										
										
											2016-09-01 19:07:41 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								                return  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        urls  =  [ ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        formats  =  [ ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        for  video  in  video_data . get ( ' renditions ' ,  [ ] ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            video_url  =  video . get ( ' url ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            format_id  =  video . get ( ' type ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  not  video_url  or  format_id  ==  ' thumbs '  or  video_url  in  urls : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                continue 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            urls . append ( video_url ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ext  =  mimetype2ext ( video . get ( ' mimetype ' ) )  or  determine_ext ( video_url ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  ext  ==  ' m3u8 ' : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                formats . extend ( self . _extract_m3u8_formats ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    video_url ,  video_id ,  ' mp4 ' ,  ' m3u8_native ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    m3u8_id = format_id  or  ' hls ' ,  fatal = False ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            elif  ext  ==  ' mpd ' : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                continue 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            #     formats.extend(self._extract_mpd_formats( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            #         video_url, video_id, format_id or 'dash', fatal=False)) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            else : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                formats . append ( { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    ' url ' :  video_url , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    ' format_id ' :  format_id , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    ' vcodec ' :  video . get ( ' videoencoding ' )  or  video . get ( ' video_codec ' ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    ' width ' :  int_or_none ( video . get ( ' width ' ) ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    ' height ' :  int_or_none ( video . get ( ' height ' ) ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    ' filesize ' :  get_file_size ( video . get ( ' file_size ' )  or  video . get ( ' fileSize ' ) ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    ' tbr ' :  int_or_none ( video . get ( ' bitrate ' ) ,  1000 ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                    ' ext ' :  ext , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                } ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-05-03 02:28:38 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								        self . _sort_formats ( formats ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2016-09-01 19:07:41 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        thumbnails  =  [ ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        for  image  in  video_data . get ( ' images ' ,  [ ] ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            image_url  =  image . get ( ' url ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            if  not  image_url : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                continue 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            thumbnails . append ( { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' url ' :  ' http://www.nytimes.com/ '  +  image_url , 
							 
						 
					
						
							
								
									
										
										
										
											2015-03-19 21:23:52 +06:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								                ' width ' :  int_or_none ( image . get ( ' width ' ) ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                ' height ' :  int_or_none ( image . get ( ' height ' ) ) , 
							 
						 
					
						
							
								
									
										
										
										
											2016-09-01 19:07:41 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            } ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        publication_date  =  video_data . get ( ' publication_date ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        timestamp  =  parse_iso8601 ( publication_date [ : - 8 ] )  if  publication_date  else  None 
							 
						 
					
						
							
								
									
										
										
										
											2014-05-03 02:28:38 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' id ' :  video_id , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' title ' :  title , 
							 
						 
					
						
							
								
									
										
										
										
											2016-09-01 19:07:41 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' description ' :  video_data . get ( ' summary ' ) , 
							 
						 
					
						
							
								
									
										
										
										
											2014-05-03 02:28:38 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								            ' timestamp ' :  timestamp , 
							 
						 
					
						
							
								
									
										
										
										
											2016-09-01 19:07:41 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								            ' uploader ' :  video_data . get ( ' byline ' ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' duration ' :  float_or_none ( video_data . get ( ' duration ' ) ,  1000 ) , 
							 
						 
					
						
							
								
									
										
										
										
											2014-05-03 02:28:38 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								            ' formats ' :  formats , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' thumbnails ' :  thumbnails , 
							 
						 
					
						
							
								
									
										
										
										
											2014-11-23 20:41:03 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        } 
							 
						 
					
						
							
								
									
										
										
										
											2015-05-04 22:32:57 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  NYTimesIE ( NYTimesBaseIE ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    _VALID_URL  =  r ' https?://(?:(?:www \ .)?nytimes \ .com/video/(?:[^/]+/)+?|graphics8 \ .nytimes \ .com/bcvideo/ \ d+(?: \ . \ d+)?/iframe/embed \ .html \ ?videoId=)(?P<id> \ d+) ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    _TESTS  =  [ { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' url ' :  ' http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263 ' , 
							 
						 
					
						
							
								
									
										
										
										
											2016-09-01 19:07:41 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        ' md5 ' :  ' d665342765db043f7e225cff19df0f2d ' , 
							 
						 
					
						
							
								
									
										
										
										
											2015-05-04 22:32:57 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' id ' :  ' 100000002847155 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' ext ' :  ' mov ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' title ' :  ' Verbatim: What Is a Photocopier? ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' description ' :  ' md5:93603dada88ddbda9395632fdc5da260 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' timestamp ' :  1398631707 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' upload_date ' :  ' 20140427 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' uploader ' :  ' Brett Weiner ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' duration ' :  419 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    } ,  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' url ' :  ' http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' only_matching ' :  True , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    } ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  _real_extract ( self ,  url ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        video_id  =  self . _match_id ( url ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  self . _extract_video_from_id ( video_id ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  NYTimesArticleIE ( NYTimesBaseIE ) :  
						 
					
						
							
								
									
										
										
										
											2015-05-12 12:42:13 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    _VALID_URL  =  r ' https?://(?:www \ .)?nytimes \ .com/(.(?<!video))*?/(?:[^/]+/)*(?P<id>[^.]+)(?: \ .html)? ' 
							 
						 
					
						
							
								
									
										
										
										
											2015-05-04 23:00:09 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    _TESTS  =  [ { 
							 
						 
					
						
							
								
									
										
										
										
											2015-05-04 22:32:57 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								        ' url ' :  ' http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' md5 ' :  ' e2076d58b4da18e6a001d53fd56db3c9 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' id ' :  ' 100000003628438 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' ext ' :  ' mov ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' title ' :  ' New Minimum Wage: $70,000 a Year ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' description ' :  ' Dan Price, C.E.O. of Gravity Payments, surprised his 120-person staff by announcing that he planned over the next three years to raise the salary of every employee to $70,000 a year. ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' timestamp ' :  1429033037 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' upload_date ' :  ' 20150414 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ' uploader ' :  ' Matthew Williams ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        } 
							 
						 
					
						
							
								
									
										
										
										
											2015-05-04 23:00:09 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								    } ,  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' url ' :  ' http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        ' only_matching ' :  True , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    } ] 
							 
						 
					
						
							
								
									
										
										
										
											2015-05-04 22:32:57 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    def  _real_extract ( self ,  url ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        video_id  =  self . _match_id ( url ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        webpage  =  self . _download_webpage ( url ,  video_id ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        video_id  =  self . _html_search_regex ( r ' data-videoid= " ( \ d+) " ' ,  webpage ,  ' video id ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        return  self . _extract_video_from_id ( video_id )