[chirbit] add profile extractor.
This commit is contained in:
		
							parent
							
								
									5da6bd0083
								
							
						
					
					
						commit
						365577f567
					
				| @ -63,7 +63,7 @@ from .ccc import CCCIE | |||||||
| from .ceskatelevize import CeskaTelevizeIE | from .ceskatelevize import CeskaTelevizeIE | ||||||
| from .channel9 import Channel9IE | from .channel9 import Channel9IE | ||||||
| from .chilloutzone import ChilloutzoneIE | from .chilloutzone import ChilloutzoneIE | ||||||
| from .chirbit import ChirbitIE | from .chirbit import ChirbitIE, ChirbitProfileIE | ||||||
| from .cinchcast import CinchcastIE | from .cinchcast import CinchcastIE | ||||||
| from .clipfish import ClipfishIE | from .clipfish import ClipfishIE | ||||||
| from .cliphunter import CliphunterIE | from .cliphunter import CliphunterIE | ||||||
|  | |||||||
| @ -1,7 +1,10 @@ | |||||||
| # coding: utf-8 | # coding: utf-8 | ||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
| 
 | 
 | ||||||
|  | import re | ||||||
|  | 
 | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
|  | from ..utils import clean_html | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class ChirbitIE(InfoExtractor): | class ChirbitIE(InfoExtractor): | ||||||
| @ -32,3 +35,63 @@ class ChirbitIE(InfoExtractor): | |||||||
|             'title': audio_title, |             'title': audio_title, | ||||||
|             'url': audio_url |             'url': audio_url | ||||||
|         } |         } | ||||||
|  | 
 | ||||||
|  | class ChirbitProfileIE(InfoExtractor): | ||||||
|  |     _VALID_URL = r'https?://(?:www\.)?chirbit.com/(?P<id>[^/]+)' | ||||||
|  |     _TEST = { | ||||||
|  |         'url': 'http://chirbit.com/ScarletBeauty', | ||||||
|  |         'playlist_count': 3, | ||||||
|  |         'info_dict': { | ||||||
|  |             '_type': 'playlist', | ||||||
|  |             'title': 'ScarletBeauty', | ||||||
|  |             'id': 'ScarletBeauty' | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         profile_id = self._match_id(url) | ||||||
|  | 
 | ||||||
|  |         # Chirbit has a pretty weird "Last Page" navigation behavior. | ||||||
|  |         # We grab the profile's oldest entry to determine when to | ||||||
|  |         # stop fetching entries. | ||||||
|  |         oldestpage = self._download_webpage(url + '/24599', profile_id) | ||||||
|  |         oldest_page_entries = re.findall( | ||||||
|  |             r'''soundFile:\s*"http://audio.chirbit.com/(.*?).mp3"''', | ||||||
|  |             oldestpage); | ||||||
|  |         oldestentry = clean_html(oldest_page_entries[-1]); | ||||||
|  | 
 | ||||||
|  |         ids = [] | ||||||
|  |         titles = [] | ||||||
|  |         n = 0 | ||||||
|  |         while True: | ||||||
|  |             page = self._download_webpage(url + '/' + str(n), profile_id) | ||||||
|  |             page_ids = re.findall( | ||||||
|  |                 r'''soundFile:\s*"http://audio.chirbit.com/(.*?).mp3"''', | ||||||
|  |                 page); | ||||||
|  |             page_titles = re.findall( | ||||||
|  |                 r'''<div\s+class="chirbit_title"\s*>(.*?)</div>''', | ||||||
|  |                 page); | ||||||
|  |             ids += page_ids | ||||||
|  |             titles += page_titles | ||||||
|  |             if oldestentry in page_ids: | ||||||
|  |                 break | ||||||
|  |             n += 1 | ||||||
|  | 
 | ||||||
|  |         entries = [] | ||||||
|  |         i = 0 | ||||||
|  |         for id in ids: | ||||||
|  |             entries.append({ | ||||||
|  |                 'id': id, | ||||||
|  |                 'title': titles[i], | ||||||
|  |                 'url': 'http://audio.chirbit.com/' + id + '.mp3' | ||||||
|  |             }); | ||||||
|  |             i += 1 | ||||||
|  | 
 | ||||||
|  |         info_dict = { | ||||||
|  |             '_type': 'playlist', | ||||||
|  |             'id': profile_id, | ||||||
|  |             'title': profile_id, | ||||||
|  |             'entries': entries | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         return info_dict; | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user