[mixcloud] Simplify url extraction
On the tracks I tested the server number in the url from the webpage is valid for the mp3 or the m4a file and any other number is invalid, it's a waste of time to check them.
This commit is contained in:
		
							parent
							
								
									d8e7ef04dc
								
							
						
					
					
						commit
						c5826a491b
					
				| @ -1,7 +1,6 @@ | |||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
| 
 | 
 | ||||||
| import re | import re | ||||||
| import itertools |  | ||||||
| 
 | 
 | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..compat import ( | from ..compat import ( | ||||||
| @ -46,20 +45,16 @@ class MixcloudIE(InfoExtractor): | |||||||
|         }, |         }, | ||||||
|     }] |     }] | ||||||
| 
 | 
 | ||||||
|     def _get_url(self, track_id, template_url, server_number): |     def _check_url(self, url, track_id, ext): | ||||||
|         boundaries = (1, 30) |         try: | ||||||
|         for nr in server_numbers(server_number, boundaries): |             # We only want to know if the request succeed | ||||||
|             url = template_url % nr |             # don't download the whole file | ||||||
|             try: |             self._request_webpage( | ||||||
|                 # We only want to know if the request succeed |                 HEADRequest(url), track_id, | ||||||
|                 # don't download the whole file |                 'Trying %s URL' % ext) | ||||||
|                 self._request_webpage( |             return True | ||||||
|                     HEADRequest(url), track_id, |         except ExtractorError: | ||||||
|                     'Checking URL %d/%d ...' % (nr, boundaries[-1])) |             return False | ||||||
|                 return url |  | ||||||
|             except ExtractorError: |  | ||||||
|                 pass |  | ||||||
|         return None |  | ||||||
| 
 | 
 | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         mobj = re.match(self._VALID_URL, url) |         mobj = re.match(self._VALID_URL, url) | ||||||
| @ -72,15 +67,10 @@ class MixcloudIE(InfoExtractor): | |||||||
|         preview_url = self._search_regex( |         preview_url = self._search_regex( | ||||||
|             r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url') |             r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url') | ||||||
|         song_url = preview_url.replace('/previews/', '/c/originals/') |         song_url = preview_url.replace('/previews/', '/c/originals/') | ||||||
|         server_number = int(self._search_regex(r'stream(\d+)', song_url, 'server number')) |         if not self._check_url(song_url, track_id, 'mp3'): | ||||||
|         template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) |             song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') | ||||||
|         final_song_url = self._get_url(track_id, template_url, server_number) |             if not self._check_url(song_url, track_id, 'm4a'): | ||||||
|         if final_song_url is None: |                 raise ExtractorError('Unable to extract track url') | ||||||
|             self.to_screen('Trying with m4a extension') |  | ||||||
|             template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') |  | ||||||
|             final_song_url = self._get_url(track_id, template_url, server_number) |  | ||||||
|         if final_song_url is None: |  | ||||||
|             raise ExtractorError('Unable to extract track url') |  | ||||||
| 
 | 
 | ||||||
|         PREFIX = ( |         PREFIX = ( | ||||||
|             r'm-play-on-spacebar[^>]+' |             r'm-play-on-spacebar[^>]+' | ||||||
| @ -107,7 +97,7 @@ class MixcloudIE(InfoExtractor): | |||||||
|         return { |         return { | ||||||
|             'id': track_id, |             'id': track_id, | ||||||
|             'title': title, |             'title': title, | ||||||
|             'url': final_song_url, |             'url': song_url, | ||||||
|             'description': description, |             'description': description, | ||||||
|             'thumbnail': thumbnail, |             'thumbnail': thumbnail, | ||||||
|             'uploader': uploader, |             'uploader': uploader, | ||||||
| @ -115,35 +105,3 @@ class MixcloudIE(InfoExtractor): | |||||||
|             'view_count': view_count, |             'view_count': view_count, | ||||||
|             'like_count': like_count, |             'like_count': like_count, | ||||||
|         } |         } | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def server_numbers(first, boundaries): |  | ||||||
|     """ Server numbers to try in descending order of probable availability. |  | ||||||
|     Starting from first (i.e. the number of the server hosting the preview file) |  | ||||||
|     and going further and further up to the higher boundary and down to the |  | ||||||
|     lower one in an alternating fashion. Namely: |  | ||||||
| 
 |  | ||||||
|         server_numbers(2, (1, 5)) |  | ||||||
| 
 |  | ||||||
|         # Where the preview server is 2, min number is 1 and max is 5. |  | ||||||
|         # Yields: 2, 3, 1, 4, 5 |  | ||||||
| 
 |  | ||||||
|     Why not random numbers or increasing sequences? Since from what I've seen, |  | ||||||
|     full length files seem to be hosted on servers whose number is closer to |  | ||||||
|     that of the preview; to be confirmed. |  | ||||||
|     """ |  | ||||||
|     zip_longest = getattr(itertools, 'zip_longest', None) |  | ||||||
|     if zip_longest is None: |  | ||||||
|         # python 2.x |  | ||||||
|         zip_longest = itertools.izip_longest |  | ||||||
| 
 |  | ||||||
|     if len(boundaries) != 2: |  | ||||||
|         raise ValueError("boundaries should be a two-element tuple") |  | ||||||
|     min, max = boundaries |  | ||||||
|     highs = range(first + 1, max + 1) |  | ||||||
|     lows = range(first - 1, min - 1, -1) |  | ||||||
|     rest = filter( |  | ||||||
|         None, itertools.chain.from_iterable(zip_longest(highs, lows))) |  | ||||||
|     yield first |  | ||||||
|     for n in rest: |  | ||||||
|         yield n |  | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user