2014-02-06 03:29:10 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from __future__ import unicode_literals
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2013-06-23 20:32:49 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								import itertools
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								import re
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								from .common import SearchInfoExtractor
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								from ..utils import (
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    compat_urllib_parse,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								class GoogleSearchIE(SearchInfoExtractor):
							 | 
						
					
						
							
								
									
										
										
										
											2014-02-06 03:29:10 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    IE_DESC = 'Google Video search'
							 | 
						
					
						
							
								
									
										
										
										
											2013-06-23 20:32:49 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    _MAX_RESULTS = 1000
							 | 
						
					
						
							
								
									
										
										
										
											2014-02-06 03:29:10 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    IE_NAME = 'video.google:search'
							 | 
						
					
						
							
								
									
										
										
										
											2013-06-23 20:32:49 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    _SEARCH_KEY = 'gvsearch'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def _get_n_results(self, query, n):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """Get a specified number of results for a query"""
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2014-02-06 03:29:10 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        entries = []
							 | 
						
					
						
							
								
									
										
										
										
											2013-06-23 20:32:49 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        res = {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            '_type': 'playlist',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            'id': query,
							 | 
						
					
						
							
								
									
										
										
										
											2014-02-06 03:29:10 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            'title': query,
							 | 
						
					
						
							
								
									
										
										
										
											2013-06-23 20:32:49 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2014-02-06 03:29:10 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        for pagenum in itertools.count():
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            result_url = (
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                % (compat_urllib_parse.quote_plus(query), pagenum * 10))
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            webpage = self._download_webpage(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                result_url, 'gvsearch:' + query,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                note='Downloading result page ' + str(pagenum + 1))
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            for hit_idx, mobj in enumerate(re.finditer(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    r'<h3 class="r"><a href="([^"]+)"', webpage)):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                # Skip playlists
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                if not re.search(r'id="vidthumb%d"' % (hit_idx + 1), webpage):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    continue
							 | 
						
					
						
							
								
									
										
										
										
											2013-06-23 20:32:49 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2014-02-06 03:29:10 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                entries.append({
							 | 
						
					
						
							
								
									
										
										
										
											2013-06-23 20:32:49 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    '_type': 'url',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    'url': mobj.group(1)
							 | 
						
					
						
							
								
									
										
										
										
											2014-02-06 03:29:10 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                })
							 | 
						
					
						
							
								
									
										
										
										
											2013-06-23 20:32:49 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2014-03-13 16:52:13 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            if (len(entries) >= n) or not re.search(r'id="pnnext"', webpage):
							 | 
						
					
						
							
								
									
										
										
										
											2014-02-06 03:29:10 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                res['entries'] = entries[:n]
							 | 
						
					
						
							
								
									
										
										
										
											2013-06-23 20:32:49 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                return res
							 |