Use _match_id() and fix URL recognition in Rutube

This commit is contained in:
luceatnobis 2017-07-06 11:27:14 +02:00
parent 7df3f4561e
commit e321a64cbd

View File

@ -7,6 +7,8 @@ import itertools
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import (
determine_ext,
@ -18,7 +20,7 @@ from ..utils import (
class RutubeIE(InfoExtractor):
IE_NAME = 'rutube'
IE_DESC = 'Rutube videos'
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})(/)?$'
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})'
_TESTS = [{
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
@ -45,6 +47,10 @@ class RutubeIE(InfoExtractor):
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if RutubePlaylistIE.suitable(url) else super(RutubeIE, cls).suitable(url)
@staticmethod
def _extract_urls(webpage):
return [mobj.group('url') for mobj in re.finditer(
@ -206,16 +212,20 @@ class RutubePlaylistIE(InfoExtractor):
'playlist_count': 25,
}]
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?.+pl_id=(?P<pl_id>\d+).*$'
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?(?:.+)?pl_id=(?P<id>\d+).*$'
_PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/source/%s/?page=%s'
@classmethod
def suitable(cls, url):
params = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
return params.get('pl_id') and params['pl_id'][0].isdigit()
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
playlist_id = m.group('pl_id')
playlist_id = self._match_id(url)
return self._extract_playlist(playlist_id)
def _extract_playlist(self, playlist_id):
import pdb
entries = []
for pagenum in itertools.count(1):
page_url = self._PAGE_TEMPLATE % (playlist_id, pagenum)