[extractor/common] Allow angle brackets in attributes in _og_regexes (#7215)
This commit is contained in:
		
							parent
							
								
									49941c4e4f
								
							
						
					
					
						commit
						448ef1f31c
					
				@ -37,12 +37,16 @@ class TestInfoExtractor(unittest.TestCase):
 | 
			
		||||
            <meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&key2=val2'/>
 | 
			
		||||
            <meta content='application/x-shockwave-flash' property='og:video:type'>
 | 
			
		||||
            <meta content='Foo' property=og:foobar>
 | 
			
		||||
            <meta name="og:test1" content='foo > < bar'/>
 | 
			
		||||
            <meta name="og:test2" content="foo >//< bar"/>
 | 
			
		||||
            '''
 | 
			
		||||
        self.assertEqual(ie._og_search_title(html), 'Foo')
 | 
			
		||||
        self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
 | 
			
		||||
        self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
 | 
			
		||||
        self.assertEqual(ie._og_search_video_url(html, default=None), None)
 | 
			
		||||
        self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
 | 
			
		||||
        self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
 | 
			
		||||
        self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
 | 
			
		||||
 | 
			
		||||
    def test_html_search_meta(self):
 | 
			
		||||
        ie = self.ie
 | 
			
		||||
 | 
			
		||||
@ -645,7 +645,7 @@ class InfoExtractor(object):
 | 
			
		||||
    # Helper functions for extracting OpenGraph info
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _og_regexes(prop):
 | 
			
		||||
        content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\'|\s*([^\s"\'=<>`]+?))'
 | 
			
		||||
        content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
 | 
			
		||||
        property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)'
 | 
			
		||||
                       % {'prop': re.escape(prop)})
 | 
			
		||||
        template = r'<meta[^>]+?%s[^>]+?%s'
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user