Merge branch 'master' into GoogleDrive-issue-13619
This commit is contained in:
commit
90793eef1d
@ -1,3 +1,10 @@
|
|||||||
|
version <unreleased>
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youku:show] Fix playlist extraction (#13248)
|
||||||
|
+ [dispeak] Recognize sevt subdomain (#13276)
|
||||||
|
|
||||||
|
|
||||||
version 2017.07.15
|
version 2017.07.15
|
||||||
|
|
||||||
Core
|
Core
|
||||||
|
@ -41,6 +41,7 @@ def _make_result(formats, **kwargs):
|
|||||||
'id': 'testid',
|
'id': 'testid',
|
||||||
'title': 'testttitle',
|
'title': 'testttitle',
|
||||||
'extractor': 'testex',
|
'extractor': 'testex',
|
||||||
|
'extractor_key': 'TestEx',
|
||||||
}
|
}
|
||||||
res.update(**kwargs)
|
res.update(**kwargs)
|
||||||
return res
|
return res
|
||||||
@ -761,7 +762,8 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': 'foo2:',
|
'url': 'foo2:',
|
||||||
'ie_key': 'Foo2',
|
'ie_key': 'Foo2',
|
||||||
'title': 'foo1 title'
|
'title': 'foo1 title',
|
||||||
|
'id': 'foo1_id',
|
||||||
}
|
}
|
||||||
|
|
||||||
class Foo2IE(InfoExtractor):
|
class Foo2IE(InfoExtractor):
|
||||||
@ -787,6 +789,9 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
downloaded = ydl.downloaded_info_dicts[0]
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
self.assertEqual(downloaded['url'], TEST_URL)
|
self.assertEqual(downloaded['url'], TEST_URL)
|
||||||
self.assertEqual(downloaded['title'], 'foo1 title')
|
self.assertEqual(downloaded['title'], 'foo1 title')
|
||||||
|
self.assertEqual(downloaded['id'], 'testid')
|
||||||
|
self.assertEqual(downloaded['extractor'], 'testex')
|
||||||
|
self.assertEqual(downloaded['extractor_key'], 'TestEx')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -860,7 +860,7 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
force_properties = dict(
|
force_properties = dict(
|
||||||
(k, v) for k, v in ie_result.items() if v is not None)
|
(k, v) for k, v in ie_result.items() if v is not None)
|
||||||
for f in ('_type', 'url', 'ie_key'):
|
for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
|
||||||
if f in force_properties:
|
if f in force_properties:
|
||||||
del force_properties[f]
|
del force_properties[f]
|
||||||
new_result = info.copy()
|
new_result = info.copy()
|
||||||
|
@ -13,7 +13,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class DigitallySpeakingIE(InfoExtractor):
|
class DigitallySpeakingIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P<id>[^.]+)\.xml'
|
_VALID_URL = r'https?://(?:s?evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P<id>[^.]+)\.xml'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# From http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface
|
# From http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface
|
||||||
@ -28,6 +28,10 @@ class DigitallySpeakingIE(InfoExtractor):
|
|||||||
# From http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC
|
# From http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC
|
||||||
'url': 'http://events.digitallyspeaking.com/gdc/sf11/xml/12396_1299111843500GMPX.xml',
|
'url': 'http://events.digitallyspeaking.com/gdc/sf11/xml/12396_1299111843500GMPX.xml',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# From http://www.gdcvault.com/play/1013700/Advanced-Material
|
||||||
|
'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _parse_mp4(self, metadata):
|
def _parse_mp4(self, metadata):
|
||||||
|
@ -2,6 +2,11 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class EggheadCourseIE(InfoExtractor):
|
class EggheadCourseIE(InfoExtractor):
|
||||||
@ -33,3 +38,47 @@ class EggheadCourseIE(InfoExtractor):
|
|||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, playlist_id, course.get('title'),
|
entries, playlist_id, course.get('title'),
|
||||||
course.get('description'))
|
course.get('description'))
|
||||||
|
|
||||||
|
|
||||||
|
class EggheadLessonIE(InfoExtractor):
|
||||||
|
IE_DESC = 'egghead.io lesson'
|
||||||
|
IE_NAME = 'egghead:lesson'
|
||||||
|
_VALID_URL = r'https://egghead\.io/lessons/(?P<id>[^/?#&]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'fv5yotjxcg',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Create linear data flow with container style types (Box)',
|
||||||
|
'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e',
|
||||||
|
'thumbnail': r're:^https?:.*\.jpg$',
|
||||||
|
'timestamp': 1481296768,
|
||||||
|
'upload_date': '20161209',
|
||||||
|
'duration': 304,
|
||||||
|
'view_count': 0,
|
||||||
|
'tags': ['javascript', 'free'],
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
lesson_id = self._match_id(url)
|
||||||
|
|
||||||
|
lesson = self._download_json(
|
||||||
|
'https://egghead.io/api/v1/lessons/%s' % lesson_id, lesson_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': 'Wistia',
|
||||||
|
'url': 'wistia:%s' % lesson['wistia_id'],
|
||||||
|
'id': lesson['wistia_id'],
|
||||||
|
'title': lesson.get('title'),
|
||||||
|
'description': lesson.get('summary'),
|
||||||
|
'thumbnail': lesson.get('thumb_nail'),
|
||||||
|
'timestamp': unified_timestamp(lesson.get('published_at')),
|
||||||
|
'duration': int_or_none(lesson.get('duration')),
|
||||||
|
'view_count': int_or_none(lesson.get('plays_count')),
|
||||||
|
'tags': try_get(lesson, lambda x: x['tag_list'], list),
|
||||||
|
}
|
||||||
|
@ -298,7 +298,10 @@ from .dw import (
|
|||||||
from .eagleplatform import EaglePlatformIE
|
from .eagleplatform import EaglePlatformIE
|
||||||
from .ebaumsworld import EbaumsWorldIE
|
from .ebaumsworld import EbaumsWorldIE
|
||||||
from .echomsk import EchoMskIE
|
from .echomsk import EchoMskIE
|
||||||
from .egghead import EggheadCourseIE
|
from .egghead import (
|
||||||
|
EggheadCourseIE,
|
||||||
|
EggheadLessonIE,
|
||||||
|
)
|
||||||
from .ehow import EHowIE
|
from .ehow import EHowIE
|
||||||
from .eighttracks import EightTracksIE
|
from .eighttracks import EightTracksIE
|
||||||
from .einthusan import EinthusanIE
|
from .einthusan import EinthusanIE
|
||||||
|
@ -1,10 +1,14 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class FunnyOrDieIE(InfoExtractor):
|
class FunnyOrDieIE(InfoExtractor):
|
||||||
@ -18,6 +22,10 @@ class FunnyOrDieIE(InfoExtractor):
|
|||||||
'title': 'Heart-Shaped Box: Literal Video Version',
|
'title': 'Heart-Shaped Box: Literal Video Version',
|
||||||
'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338',
|
'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338',
|
||||||
'thumbnail': r're:^http:.*\.jpg$',
|
'thumbnail': r're:^http:.*\.jpg$',
|
||||||
|
'uploader': 'DASjr',
|
||||||
|
'timestamp': 1317904928,
|
||||||
|
'upload_date': '20111006',
|
||||||
|
'duration': 318.3,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.funnyordie.com/embed/e402820827',
|
'url': 'http://www.funnyordie.com/embed/e402820827',
|
||||||
@ -27,6 +35,8 @@ class FunnyOrDieIE(InfoExtractor):
|
|||||||
'title': 'Please Use This Song (Jon Lajoie)',
|
'title': 'Please Use This Song (Jon Lajoie)',
|
||||||
'description': 'Please use this to sell something. www.jonlajoie.com',
|
'description': 'Please use this to sell something. www.jonlajoie.com',
|
||||||
'thumbnail': r're:^http:.*\.jpg$',
|
'thumbnail': r're:^http:.*\.jpg$',
|
||||||
|
'timestamp': 1398988800,
|
||||||
|
'upload_date': '20140502',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -100,15 +110,53 @@ class FunnyOrDieIE(InfoExtractor):
|
|||||||
'url': 'http://www.funnyordie.com%s' % src,
|
'url': 'http://www.funnyordie.com%s' % src,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
post_json = self._search_regex(
|
timestamp = unified_timestamp(self._html_search_meta(
|
||||||
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
|
'uploadDate', webpage, 'timestamp', default=None))
|
||||||
post = json.loads(post_json)
|
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
r'<h\d[^>]+\bclass=["\']channel-preview-name[^>]+>(.+?)</h',
|
||||||
|
webpage, 'uploader', default=None)
|
||||||
|
|
||||||
|
title, description, thumbnail, duration = [None] * 4
|
||||||
|
|
||||||
|
medium = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'jsonMedium\s*=\s*({.+?});', webpage, 'JSON medium',
|
||||||
|
default='{}'),
|
||||||
|
video_id, fatal=False)
|
||||||
|
if medium:
|
||||||
|
title = medium.get('title')
|
||||||
|
duration = float_or_none(medium.get('duration'))
|
||||||
|
if not timestamp:
|
||||||
|
timestamp = unified_timestamp(medium.get('publishDate'))
|
||||||
|
|
||||||
|
post = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details',
|
||||||
|
default='{}'),
|
||||||
|
video_id, fatal=False)
|
||||||
|
if post:
|
||||||
|
if not title:
|
||||||
|
title = post.get('name')
|
||||||
|
description = post.get('description')
|
||||||
|
thumbnail = post.get('picture')
|
||||||
|
|
||||||
|
if not title:
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
if not description:
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
if not duration:
|
||||||
|
duration = int_or_none(self._html_search_meta(
|
||||||
|
('video:duration', 'duration'), webpage, 'duration', default=False))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': post['name'],
|
'title': title,
|
||||||
'description': post.get('description'),
|
'description': description,
|
||||||
'thumbnail': post.get('picture'),
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
@ -1569,27 +1569,6 @@ class GenericIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# Nexx iFrame embed
|
|
||||||
{
|
|
||||||
'url': 'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '161464',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Nervenkitzel Achterbahn',
|
|
||||||
'alt_title': 'Karussellbauer in Deutschland',
|
|
||||||
'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
|
|
||||||
'release_year': 2005,
|
|
||||||
'creator': 'SPIEGEL TV',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'duration': 2761,
|
|
||||||
'timestamp': 1394021479,
|
|
||||||
'upload_date': '20140305',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'format': 'bestvideo',
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
# Facebook <iframe> embed
|
# Facebook <iframe> embed
|
||||||
{
|
{
|
||||||
'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
|
'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
|
||||||
|
@ -122,6 +122,26 @@ class SpiegelArticleIE(InfoExtractor):
|
|||||||
|
|
||||||
},
|
},
|
||||||
'playlist_count': 6,
|
'playlist_count': 6,
|
||||||
|
}, {
|
||||||
|
# Nexx iFrame embed
|
||||||
|
'url': 'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '161464',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Nervenkitzel Achterbahn',
|
||||||
|
'alt_title': 'Karussellbauer in Deutschland',
|
||||||
|
'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
|
||||||
|
'release_year': 2005,
|
||||||
|
'creator': 'SPIEGEL TV',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 2761,
|
||||||
|
'timestamp': 1394021479,
|
||||||
|
'upload_date': '20140305',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -8,6 +8,9 @@ from ..utils import extract_attributes
|
|||||||
|
|
||||||
|
|
||||||
class TBSIE(TurnerBaseIE):
|
class TBSIE(TurnerBaseIE):
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/13658
|
||||||
|
_WORKING = False
|
||||||
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/videos/(?:[^/]+/)+(?P<id>[^/?#]+)\.html'
|
_VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/videos/(?:[^/]+/)+(?P<id>[^/?#]+)\.html'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.tbs.com/videos/people-of-earth/season-1/extras/2007318/theatrical-trailer.html',
|
'url': 'http://www.tbs.com/videos/people-of-earth/season-1/extras/2007318/theatrical-trailer.html',
|
||||||
@ -17,7 +20,8 @@ class TBSIE(TurnerBaseIE):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Theatrical Trailer',
|
'title': 'Theatrical Trailer',
|
||||||
'description': 'Catch the latest comedy from TBS, People of Earth, premiering Halloween night--Monday, October 31, at 9/8c.',
|
'description': 'Catch the latest comedy from TBS, People of Earth, premiering Halloween night--Monday, October 31, at 9/8c.',
|
||||||
}
|
},
|
||||||
|
'skip': 'TBS videos are deleted after a while',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.tntdrama.com/videos/good-behavior/season-1/extras/1538823/you-better-run.html',
|
'url': 'http://www.tntdrama.com/videos/good-behavior/season-1/extras/1538823/you-better-run.html',
|
||||||
'md5': 'ce53c6ead5e9f3280b4ad2031a6fab56',
|
'md5': 'ce53c6ead5e9f3280b4ad2031a6fab56',
|
||||||
@ -26,7 +30,8 @@ class TBSIE(TurnerBaseIE):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'You Better Run',
|
'title': 'You Better Run',
|
||||||
'description': 'Letty Raines must figure out what she\'s running toward while running away from her past. Good Behavior premieres November 15 at 9/8c.',
|
'description': 'Letty Raines must figure out what she\'s running toward while running away from her past. Good Behavior premieres November 15 at 9/8c.',
|
||||||
}
|
},
|
||||||
|
'skip': 'TBS videos are deleted after a while',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import itertools
|
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
@ -14,7 +13,6 @@ from ..utils import (
|
|||||||
js_to_json,
|
js_to_json,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
urljoin,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -222,17 +220,42 @@ class YoukuShowIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://list\.youku\.com/show/id_(?P<id>[0-9a-z]+)\.html'
|
_VALID_URL = r'https?://list\.youku\.com/show/id_(?P<id>[0-9a-z]+)\.html'
|
||||||
IE_NAME = 'youku:show'
|
IE_NAME = 'youku:show'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://list.youku.com/show/id_zc7c670be07ff11e48b3f.html',
|
'url': 'http://list.youku.com/show/id_zc7c670be07ff11e48b3f.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'zc7c670be07ff11e48b3f',
|
'id': 'zc7c670be07ff11e48b3f',
|
||||||
'title': '花千骨 未删减版',
|
'title': '花千骨 DVD版',
|
||||||
'description': 'md5:a1ae6f5618571bbeb5c9821f9c81b558',
|
'description': 'md5:a1ae6f5618571bbeb5c9821f9c81b558',
|
||||||
},
|
},
|
||||||
'playlist_count': 50,
|
'playlist_count': 50,
|
||||||
}
|
}, {
|
||||||
|
# Episode number not starting from 1
|
||||||
|
'url': 'http://list.youku.com/show/id_zefbfbd70efbfbd780bef.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'zefbfbd70efbfbd780bef',
|
||||||
|
'title': '超级飞侠3',
|
||||||
|
'description': 'md5:275715156abebe5ccc2a1992e9d56b98',
|
||||||
|
},
|
||||||
|
'playlist_count': 24,
|
||||||
|
}, {
|
||||||
|
# Ongoing playlist. The initial page is the last one
|
||||||
|
'url': 'http://list.youku.com/show/id_za7c275ecd7b411e1a19e.html',
|
||||||
|
'only_matchine': True,
|
||||||
|
}]
|
||||||
|
|
||||||
_PAGE_SIZE = 40
|
def _extract_entries(self, playlist_data_url, show_id, note, query):
|
||||||
|
query['callback'] = 'cb'
|
||||||
|
playlist_data = self._download_json(
|
||||||
|
playlist_data_url, show_id, query=query, note=note,
|
||||||
|
transform_source=lambda s: js_to_json(strip_jsonp(s)))['html']
|
||||||
|
drama_list = (get_element_by_class('p-drama-grid', playlist_data) or
|
||||||
|
get_element_by_class('p-drama-half-row', playlist_data))
|
||||||
|
if drama_list is None:
|
||||||
|
raise ExtractorError('No episodes found')
|
||||||
|
video_urls = re.findall(r'<a[^>]+href="([^"]+)"', drama_list)
|
||||||
|
return playlist_data, [
|
||||||
|
self.url_result(self._proto_relative_url(video_url, 'http:'), YoukuIE.ie_key())
|
||||||
|
for video_url in video_urls]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
show_id = self._match_id(url)
|
show_id = self._match_id(url)
|
||||||
@ -242,30 +265,29 @@ class YoukuShowIE(InfoExtractor):
|
|||||||
page_config = self._parse_json(self._search_regex(
|
page_config = self._parse_json(self._search_regex(
|
||||||
r'var\s+PageConfig\s*=\s*({.+});', webpage, 'page config'),
|
r'var\s+PageConfig\s*=\s*({.+});', webpage, 'page config'),
|
||||||
show_id, transform_source=js_to_json)
|
show_id, transform_source=js_to_json)
|
||||||
for idx in itertools.count(0):
|
first_page, initial_entries = self._extract_entries(
|
||||||
if idx == 0:
|
'http://list.youku.com/show/module', show_id,
|
||||||
playlist_data_url = 'http://list.youku.com/show/module'
|
note='Downloading initial playlist data page',
|
||||||
query = {'id': page_config['showid'], 'tab': 'point'}
|
query={
|
||||||
else:
|
|
||||||
playlist_data_url = 'http://list.youku.com/show/point'
|
|
||||||
query = {
|
|
||||||
'id': page_config['showid'],
|
'id': page_config['showid'],
|
||||||
'stage': 'reload_%d' % (self._PAGE_SIZE * idx + 1),
|
'tab': 'showInfo',
|
||||||
}
|
})
|
||||||
query['callback'] = 'cb'
|
first_page_reload_id = self._html_search_regex(
|
||||||
playlist_data = self._download_json(
|
r'<div[^>]+id="(reload_\d+)', first_page, 'first page reload id')
|
||||||
playlist_data_url, show_id, query=query,
|
# The first reload_id has the same items as first_page
|
||||||
|
reload_ids = re.findall('<li[^>]+data-id="([^"]+)">', first_page)
|
||||||
|
for idx, reload_id in enumerate(reload_ids):
|
||||||
|
if reload_id == first_page_reload_id:
|
||||||
|
entries.extend(initial_entries)
|
||||||
|
continue
|
||||||
|
_, new_entries = self._extract_entries(
|
||||||
|
'http://list.youku.com/show/episode', show_id,
|
||||||
note='Downloading playlist data page %d' % (idx + 1),
|
note='Downloading playlist data page %d' % (idx + 1),
|
||||||
transform_source=lambda s: js_to_json(strip_jsonp(s)))['html']
|
query={
|
||||||
video_urls = re.findall(
|
'id': page_config['showid'],
|
||||||
r'<div[^>]+class="p-thumb"[^<]+<a[^>]+href="([^"]+)"',
|
'stage': reload_id,
|
||||||
playlist_data)
|
})
|
||||||
new_entries = [
|
|
||||||
self.url_result(urljoin(url, video_url), YoukuIE.ie_key())
|
|
||||||
for video_url in video_urls]
|
|
||||||
entries.extend(new_entries)
|
entries.extend(new_entries)
|
||||||
if len(new_entries) < self._PAGE_SIZE:
|
|
||||||
break
|
|
||||||
|
|
||||||
desc = self._html_search_meta('description', webpage, fatal=False)
|
desc = self._html_search_meta('description', webpage, fatal=False)
|
||||||
playlist_title = desc.split(',')[0] if desc else None
|
playlist_title = desc.split(',')[0] if desc else None
|
||||||
|
Loading…
x
Reference in New Issue
Block a user