2016-10-14 22:16:43 -04:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								# coding: utf-8  
						 
					
						
							
								
									
										
										
										
											2014-05-03 02:28:38 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								from  __future__  import  unicode_literals  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2016-09-01 19:07:41 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								import  hmac  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								import  hashlib  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								import  base64  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-05-03 02:28:38 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								from  . common  import  InfoExtractor  
						 
					
						
							
								
									
										
										
										
											2015-03-19 21:23:52 +06:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								from  . . utils  import  (  
						 
					
						
							
								
									
										
										
										
											2016-10-16 18:21:02 +07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								    determine_ext , 
							 
						 
					
						
							
								
									
										
										
										
											2015-03-19 21:23:52 +06:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								    float_or_none , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								    int_or_none , 
							 
						 
					
						
							
								
									
										
										
										
											2016-10-14 22:16:43 -04:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								    js_to_json , 
							 
						 
					
						
							
								
									
										
										
										
											2016-09-01 19:07:41 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								    mimetype2ext , 
							 
						 
					
						
							
								
									
										
										
										
											2016-10-16 18:21:02 +07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								    parse_iso8601 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								    remove_start , 
							 
						 
					
						
							
								
									
										
										
										
											2015-03-19 21:23:52 +06:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								)  
						 
					
						
							
								
									
										
										
										
											2014-05-03 02:28:38 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-05-04 22:32:57 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								class  NYTimesBaseIE ( InfoExtractor ) :  
						 
					
						
							
								
									
										
										
										
											2016-09-01 19:07:41 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								    _SECRET  =  b ' pX(2MbU2);4N { 7J8)>YwKRJ+/pQ3JkiU2Q^V>mFYv6g6gYvt6v ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-05-04 22:32:57 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								    def  _extract_video_from_id ( self ,  video_id ) : 
							 
						 
					
						
							
								
									
										
										
										
											2016-09-01 19:07:41 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								        # Authorization generation algorithm is reverse engineered from `signer` in 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        # http://graphics8.nytimes.com/video/vhs/vhs-2.x.min.js 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        path  =  ' /svc/video/api/v3/video/ '  +  video_id 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        hm  =  hmac . new ( self . _SECRET ,  ( path  +  ' :vhs ' ) . encode ( ) ,  hashlib . sha512 ) . hexdigest ( ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        video_data  =  self . _download_json ( ' http://www.nytimes.com '  +  path ,  video_id ,  ' Downloading video JSON ' ,  headers = { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' Authorization ' :  ' NYTV  '  +  base64 . b64encode ( hm . encode ( ) ) . decode ( ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' X-NYTV ' :  ' vhs ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        } ,  fatal = False ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        if  not  video_data : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            video_data  =  self . _download_json ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								                ' http://www.nytimes.com/svc/video/api/v2/video/ '  +  video_id , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								                video_id ,  ' Downloading video JSON ' ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-05-03 02:28:38 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        title  =  video_data [ ' headline ' ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-05-03 03:11:38 +07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								        def  get_file_size ( file_size ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            if  isinstance ( file_size ,  int ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								                return  file_size 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            elif  isinstance ( file_size ,  dict ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								                return  int ( file_size . get ( ' value ' ,  0 ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            else : 
							 
						 
					
						
							
								
									
										
										
										
											2016-09-01 19:07:41 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								                return  None 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        urls  =  [ ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        formats  =  [ ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        for  video  in  video_data . get ( ' renditions ' ,  [ ] ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            video_url  =  video . get ( ' url ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            format_id  =  video . get ( ' type ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            if  not  video_url  or  format_id  ==  ' thumbs '  or  video_url  in  urls : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								                continue 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            urls . append ( video_url ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ext  =  mimetype2ext ( video . get ( ' mimetype ' ) )  or  determine_ext ( video_url ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            if  ext  ==  ' m3u8 ' : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								                formats . extend ( self . _extract_m3u8_formats ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								                    video_url ,  video_id ,  ' mp4 ' ,  ' m3u8_native ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								                    m3u8_id = format_id  or  ' hls ' ,  fatal = False ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            elif  ext  ==  ' mpd ' : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								                continue 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            #     formats.extend(self._extract_mpd_formats( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            #         video_url, video_id, format_id or 'dash', fatal=False)) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            else : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								                formats . append ( { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								                    ' url ' :  video_url , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								                    ' format_id ' :  format_id , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								                    ' vcodec ' :  video . get ( ' videoencoding ' )  or  video . get ( ' video_codec ' ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								                    ' width ' :  int_or_none ( video . get ( ' width ' ) ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								                    ' height ' :  int_or_none ( video . get ( ' height ' ) ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								                    ' filesize ' :  get_file_size ( video . get ( ' file_size ' )  or  video . get ( ' fileSize ' ) ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								                    ' tbr ' :  int_or_none ( video . get ( ' bitrate ' ) ,  1000 ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								                    ' ext ' :  ext , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								                } ) 
							 
						 
					
						
							
								
									
										
										
										
											2014-05-03 02:28:38 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        self . _sort_formats ( formats ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2016-09-01 19:07:41 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								        thumbnails  =  [ ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        for  image  in  video_data . get ( ' images ' ,  [ ] ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            image_url  =  image . get ( ' url ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            if  not  image_url : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								                continue 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            thumbnails . append ( { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								                ' url ' :  ' http://www.nytimes.com/ '  +  image_url , 
							 
						 
					
						
							
								
									
										
										
										
											2015-03-19 21:23:52 +06:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								                ' width ' :  int_or_none ( image . get ( ' width ' ) ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								                ' height ' :  int_or_none ( image . get ( ' height ' ) ) , 
							 
						 
					
						
							
								
									
										
										
										
											2016-09-01 19:07:41 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								            } ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        publication_date  =  video_data . get ( ' publication_date ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        timestamp  =  parse_iso8601 ( publication_date [ : - 8 ] )  if  publication_date  else  None 
							 
						 
					
						
							
								
									
										
										
										
											2014-05-03 02:28:38 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        return  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' id ' :  video_id , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' title ' :  title , 
							 
						 
					
						
							
								
									
										
										
										
											2016-09-01 19:07:41 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								            ' description ' :  video_data . get ( ' summary ' ) , 
							 
						 
					
						
							
								
									
										
										
										
											2014-05-03 02:28:38 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' timestamp ' :  timestamp , 
							 
						 
					
						
							
								
									
										
										
										
											2016-09-01 19:07:41 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								            ' uploader ' :  video_data . get ( ' byline ' ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' duration ' :  float_or_none ( video_data . get ( ' duration ' ) ,  1000 ) , 
							 
						 
					
						
							
								
									
										
										
										
											2014-05-03 02:28:38 +07:00 
										
									 
								 
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' formats ' :  formats , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' thumbnails ' :  thumbnails , 
							 
						 
					
						
							
								
									
										
										
										
											2014-11-23 20:41:03 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								        } 
							 
						 
					
						
							
								
									
										
										
										
											2015-05-04 22:32:57 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								class  NYTimesIE ( NYTimesBaseIE ) :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								    _VALID_URL  =  r ' https?://(?:(?:www \ .)?nytimes \ .com/video/(?:[^/]+/)+?|graphics8 \ .nytimes \ .com/bcvideo/ \ d+(?: \ . \ d+)?/iframe/embed \ .html \ ?videoId=)(?P<id> \ d+) ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								    _TESTS  =  [ { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        ' url ' :  ' http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263 ' , 
							 
						 
					
						
							
								
									
										
										
										
											2016-09-01 19:07:41 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								        ' md5 ' :  ' d665342765db043f7e225cff19df0f2d ' , 
							 
						 
					
						
							
								
									
										
										
										
											2015-05-04 22:32:57 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' id ' :  ' 100000002847155 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' ext ' :  ' mov ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' title ' :  ' Verbatim: What Is a Photocopier? ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' description ' :  ' md5:93603dada88ddbda9395632fdc5da260 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' timestamp ' :  1398631707 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' upload_date ' :  ' 20140427 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' uploader ' :  ' Brett Weiner ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' duration ' :  419 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								    } ,  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        ' url ' :  ' http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        ' only_matching ' :  True , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								    } ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								    def  _real_extract ( self ,  url ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        video_id  =  self . _match_id ( url ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        return  self . _extract_video_from_id ( video_id ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								class  NYTimesArticleIE ( NYTimesBaseIE ) :  
						 
					
						
							
								
									
										
										
										
											2015-05-12 12:42:13 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								    _VALID_URL  =  r ' https?://(?:www \ .)?nytimes \ .com/(.(?<!video))*?/(?:[^/]+/)*(?P<id>[^.]+)(?: \ .html)? ' 
							 
						 
					
						
							
								
									
										
										
										
											2015-05-04 23:00:09 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								    _TESTS  =  [ { 
							 
						 
					
						
							
								
									
										
										
										
											2015-05-04 22:32:57 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								        ' url ' :  ' http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        ' md5 ' :  ' e2076d58b4da18e6a001d53fd56db3c9 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' id ' :  ' 100000003628438 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' ext ' :  ' mov ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' title ' :  ' New Minimum Wage: $70,000 a Year ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' description ' :  ' Dan Price, C.E.O. of Gravity Payments, surprised his 120-person staff by announcing that he planned over the next three years to raise the salary of every employee to $70,000 a year. ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' timestamp ' :  1429033037 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' upload_date ' :  ' 20150414 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' uploader ' :  ' Matthew Williams ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        } 
							 
						 
					
						
							
								
									
										
										
										
											2016-10-14 22:16:43 -04:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								    } ,  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        ' url ' :  ' http://www.nytimes.com/2016/10/14/podcasts/revelations-from-the-final-weeks.html ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        ' md5 ' :  ' e0d52040cafb07662acf3c9132db3575 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
									
										
										
										
											2016-10-16 18:21:02 +07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								            ' id ' :  ' 100000004709062 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
										 
							
							
								            ' title ' :  ' The Run-Up: ‘ ’  ' , 
							 
						 
					
						
							
								
									
										
										
										
											2016-10-14 22:16:43 -04:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								            ' ext ' :  ' mp3 ' , 
							 
						 
					
						
							
								
									
										
										
										
											2016-10-16 18:21:02 +07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								            ' description ' :  ' md5:fb5c6b93b12efc51649b4847fe066ee4 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' series ' :  ' The Run-Up ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
										 
							
							
								            ' episode ' :  ' ‘ ’ ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' episode_number ' :  20 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' duration ' :  2130 , 
							 
						 
					
						
							
								
									
										
										
										
											2016-10-14 22:16:43 -04:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								    } ,  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        ' url ' :  ' http://www.nytimes.com/2016/10/16/books/review/inside-the-new-york-times-book-review-the-rise-of-hitler.html ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        ' info_dict ' :  { 
							 
						 
					
						
							
								
									
										
										
										
											2016-10-16 18:21:02 +07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								            ' id ' :  ' 100000004709479 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' title ' :  ' The Rise of Hitler ' , 
							 
						 
					
						
							
								
									
										
										
										
											2016-10-14 22:16:43 -04:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								            ' ext ' :  ' mp3 ' , 
							 
						 
					
						
							
								
									
										
										
										
											2016-10-16 18:21:02 +07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								            ' description ' :  ' md5:bce877fd9e3444990cb141875fab0028 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' creator ' :  ' Pamela Paul ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' duration ' :  3475 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        ' params ' :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' skip_download ' :  True , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        } , 
							 
						 
					
						
							
								
									
										
										
										
											2015-05-04 23:00:09 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								    } ,  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        ' url ' :  ' http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1 ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        ' only_matching ' :  True , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								    } ] 
							 
						 
					
						
							
								
									
										
										
										
											2015-05-04 22:32:57 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2016-10-16 18:21:02 +07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								    def  _extract_podcast_from_json ( self ,  json ,  page_id ,  webpage ) : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        podcast_audio  =  self . _parse_json ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            json ,  page_id ,  transform_source = js_to_json ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        audio_data  =  podcast_audio [ ' data ' ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        track  =  audio_data [ ' track ' ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        episode_title  =  track [ ' title ' ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        video_url  =  track [ ' source ' ] 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        description  =  track . get ( ' description ' )  or  self . _html_search_meta ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            [ ' og:description ' ,  ' twitter:description ' ] ,  webpage ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        podcast_title  =  audio_data . get ( ' podcast ' ,  { } ) . get ( ' title ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        title  =  ( ' %s :  %s '  %  ( podcast_title ,  episode_title ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								                 if  podcast_title  else  episode_title ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        episode  =  audio_data . get ( ' podcast ' ,  { } ) . get ( ' episode ' )  or  ' ' 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        episode_number  =  int_or_none ( self . _search_regex ( 
							 
						 
					
						
							
								
									
										
										
										
											2016-10-17 22:16:23 +07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								            r ' [Ee]pisode \ s+( \ d+) ' ,  episode ,  ' episode number ' ,  default = None ) ) 
							 
						 
					
						
							
								
									
										
										
										
											2016-10-16 18:21:02 +07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        return  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' id ' :  remove_start ( podcast_audio . get ( ' target ' ) ,  ' FT ' )  or  page_id , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' url ' :  video_url , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' title ' :  title , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' description ' :  description , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' creator ' :  track . get ( ' credit ' ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' series ' :  podcast_title , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' episode ' :  episode_title , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' episode_number ' :  episode_number , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ' duration ' :  int_or_none ( track . get ( ' duration ' ) ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2015-05-04 22:32:57 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								    def  _real_extract ( self ,  url ) : 
							 
						 
					
						
							
								
									
										
										
										
											2016-10-14 22:16:43 -04:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								        page_id  =  self . _match_id ( url ) 
							 
						 
					
						
							
								
									
										
										
										
											2015-05-04 22:32:57 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2016-10-14 22:16:43 -04:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								        webpage  =  self . _download_webpage ( url ,  page_id ) 
							 
						 
					
						
							
								
									
										
										
										
											2015-05-04 22:32:57 +08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2016-10-16 18:21:02 +07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								        video_id  =  self . _search_regex ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            r ' data-videoid=[ " \' ]( \ d+) ' ,  webpage ,  ' video id ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            default = None ,  fatal = False ) 
							 
						 
					
						
							
								
									
										
										
										
											2016-10-14 22:16:43 -04:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								        if  video_id  is  not  None : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            return  self . _extract_video_from_id ( video_id ) 
							 
						 
					
						
							
								
									
										
										
										
											2016-10-16 18:21:02 +07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
								
									
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        podcast_data  =  self . _search_regex ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            ( r ' NYTD \ .FlexTypes \ .push \ s* \ ( \ s*( { .+?}) \ s* \ ) \ s*; \ s*</script ' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								             r ' NYTD \ .FlexTypes \ .push \ s* \ ( \ s*( { .+}) \ s* \ ) \ s*; ' ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								            webpage ,  ' podcast data ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
								
									
								 
							
							
								        return  self . _extract_podcast_from_json ( podcast_data ,  page_id ,  webpage )