[fivemin] improve extraction
- skip m3u8 formats(404 error) - skip unavailable test - download embed page only when it's needed - update _VALID_URL regex(joystiq.com redirect to engadget.com)
This commit is contained in:
		
							parent
							
								
									e1dd521e49
								
							
						
					
					
						commit
						6d6536acb2
					
				@ -1,5 +1,7 @@
 | 
				
			|||||||
from __future__ import unicode_literals
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .common import InfoExtractor
 | 
					from .common import InfoExtractor
 | 
				
			||||||
from ..compat import (
 | 
					from ..compat import (
 | 
				
			||||||
    compat_urllib_parse,
 | 
					    compat_urllib_parse,
 | 
				
			||||||
@ -16,12 +18,7 @@ from ..utils import (
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
class FiveMinIE(InfoExtractor):
 | 
					class FiveMinIE(InfoExtractor):
 | 
				
			||||||
    IE_NAME = '5min'
 | 
					    IE_NAME = '5min'
 | 
				
			||||||
    _VALID_URL = r'''(?x)
 | 
					    _VALID_URL = r'(?:5min:(?P<id>\d+)(?::(?P<sid>\d+))?|https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?P<query>.*))'
 | 
				
			||||||
        (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
 | 
					 | 
				
			||||||
            https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
 | 
					 | 
				
			||||||
            5min:)
 | 
					 | 
				
			||||||
        (?P<id>\d+)
 | 
					 | 
				
			||||||
        '''
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    _TESTS = [
 | 
					    _TESTS = [
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
@ -45,6 +42,7 @@ class FiveMinIE(InfoExtractor):
 | 
				
			|||||||
                'title': 'How to Make a Next-Level Fruit Salad',
 | 
					                'title': 'How to Make a Next-Level Fruit Salad',
 | 
				
			||||||
                'duration': 184,
 | 
					                'duration': 184,
 | 
				
			||||||
            },
 | 
					            },
 | 
				
			||||||
 | 
					            'skip': 'no longer available',
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
    _ERRORS = {
 | 
					    _ERRORS = {
 | 
				
			||||||
@ -91,20 +89,33 @@ class FiveMinIE(InfoExtractor):
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _real_extract(self, url):
 | 
					    def _real_extract(self, url):
 | 
				
			||||||
        video_id = self._match_id(url)
 | 
					        mobj = re.match(self._VALID_URL, url)
 | 
				
			||||||
 | 
					        video_id = mobj.group('id')
 | 
				
			||||||
 | 
					        sid = mobj.group('sid')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if mobj.group('query'):
 | 
				
			||||||
 | 
					            qs = compat_parse_qs(mobj.group('query'))
 | 
				
			||||||
 | 
					            if not qs.get('playList'):
 | 
				
			||||||
 | 
					                raise ExtractorError('Invalid URL', expected=True)
 | 
				
			||||||
 | 
					            video_id = qs['playList'][0]
 | 
				
			||||||
 | 
					            if qs.get('sid'):
 | 
				
			||||||
 | 
					                sid = qs['sid'][0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
 | 
					        embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
 | 
				
			||||||
        embed_page = self._download_webpage(embed_url, video_id,
 | 
					        if not sid:
 | 
				
			||||||
                                            'Downloading embed page')
 | 
					            embed_page = self._download_webpage(embed_url, video_id,
 | 
				
			||||||
        sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
 | 
					                                                'Downloading embed page')
 | 
				
			||||||
        query = compat_urllib_parse.urlencode({
 | 
					            sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
 | 
				
			||||||
            'func': 'GetResults',
 | 
					
 | 
				
			||||||
            'playlist': video_id,
 | 
					 | 
				
			||||||
            'sid': sid,
 | 
					 | 
				
			||||||
            'isPlayerSeed': 'true',
 | 
					 | 
				
			||||||
            'url': embed_url,
 | 
					 | 
				
			||||||
        })
 | 
					 | 
				
			||||||
        response = self._download_json(
 | 
					        response = self._download_json(
 | 
				
			||||||
            'https://syn.5min.com/handlers/SenseHandler.ashx?' + query,
 | 
					            'https://syn.5min.com/handlers/SenseHandler.ashx?' +
 | 
				
			||||||
 | 
					            compat_urllib_parse.urlencode({
 | 
				
			||||||
 | 
					                'func': 'GetResults',
 | 
				
			||||||
 | 
					                'playlist': video_id,
 | 
				
			||||||
 | 
					                'sid': sid,
 | 
				
			||||||
 | 
					                'isPlayerSeed': 'true',
 | 
				
			||||||
 | 
					                'url': embed_url,
 | 
				
			||||||
 | 
					            }),
 | 
				
			||||||
            video_id)
 | 
					            video_id)
 | 
				
			||||||
        if not response['success']:
 | 
					        if not response['success']:
 | 
				
			||||||
            raise ExtractorError(
 | 
					            raise ExtractorError(
 | 
				
			||||||
@ -118,9 +129,7 @@ class FiveMinIE(InfoExtractor):
 | 
				
			|||||||
        parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs(
 | 
					        parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs(
 | 
				
			||||||
            compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0])
 | 
					            compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0])
 | 
				
			||||||
        for rendition in info['Renditions']:
 | 
					        for rendition in info['Renditions']:
 | 
				
			||||||
            if rendition['RenditionType'] == 'm3u8':
 | 
					            if rendition['RenditionType'] == 'aac' or rendition['RenditionType'] == 'm3u8':
 | 
				
			||||||
                formats.extend(self._extract_m3u8_formats(rendition['Url'], video_id, m3u8_id='hls'))
 | 
					 | 
				
			||||||
            elif rendition['RenditionType'] == 'aac':
 | 
					 | 
				
			||||||
                continue
 | 
					                continue
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType'])))
 | 
					                rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType'])))
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user