Merge branch 'master' into GoogleDrive-issue-13619

This commit is contained in:
Parmjit Virk 2017-08-18 15:52:05 -05:00
commit 4687757aa1
15 changed files with 262 additions and 121 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.13*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.13** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.18**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.08.13 [debug] youtube-dl version 2017.08.18
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -223,3 +223,4 @@ Jan Kundrát
Giuseppe Fabiano Giuseppe Fabiano
Örn Guðjónsson Örn Guðjónsson
Parmjit Virk Parmjit Virk
Genki Sky

View File

@ -1,3 +1,21 @@
version 2017.08.18
Core
* [YoutubeDL] Sanitize byte string format URLs (#13951)
+ [extractor/common] Add support for float durations in _parse_mpd_formats
(#13919)
Extractors
* [arte] Detect unavailable videos (#13945)
* [generic] Convert redirect URLs to unicode strings (#13951)
* [udemy] Fix paid course detection (#13943)
* [pluralsight] Use RPC API for course extraction (#13937)
+ [clippit] Add support for clippituser.tv
+ [qqmusic] Support new URL schemes (#13805)
* [periscope] Renew HLS extraction (#13917)
* [mixcloud] Extract decrypt key
version 2017.08.13 version 2017.08.13
Core Core

View File

@ -156,6 +156,7 @@
- **Cinchcast** - **Cinchcast**
- **CJSW** - **CJSW**
- **cliphunter** - **cliphunter**
- **Clippit**
- **ClipRs** - **ClipRs**
- **Clipsyndicate** - **Clipsyndicate**
- **CloserToTruth** - **CloserToTruth**

View File

@ -1483,12 +1483,14 @@ class YoutubeDL(object):
def is_wellformed(f): def is_wellformed(f):
url = f.get('url') url = f.get('url')
valid_url = url and isinstance(url, compat_str) if not url:
if not valid_url:
self.report_warning( self.report_warning(
'"url" field is missing or empty - skipping format, ' '"url" field is missing or empty - skipping format, '
'there is an error in extractor') 'there is an error in extractor')
return valid_url return False
if isinstance(url, bytes):
sanitize_string_field(f, 'url')
return True
# Filter out malformed formats for better extraction robustness # Filter out malformed formats for better extraction robustness
formats = list(filter(is_wellformed, formats)) formats = list(filter(is_wellformed, formats))

View File

@ -9,12 +9,13 @@ from ..compat import (
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
) )
from ..utils import ( from ..utils import (
ExtractorError,
find_xpath_attr, find_xpath_attr,
unified_strdate,
get_element_by_attribute, get_element_by_attribute,
int_or_none, int_or_none,
NO_DEFAULT, NO_DEFAULT,
qualities, qualities,
unified_strdate,
) )
# There are different sources of video in arte.tv, the extraction process # There are different sources of video in arte.tv, the extraction process
@ -79,6 +80,13 @@ class ArteTVBaseIE(InfoExtractor):
info = self._download_json(json_url, video_id) info = self._download_json(json_url, video_id)
player_info = info['videoJsonPlayer'] player_info = info['videoJsonPlayer']
vsr = player_info['VSR']
if not vsr and not player_info.get('VRU'):
raise ExtractorError(
'Video %s is not available' % player_info.get('VID') or video_id,
expected=True)
upload_date_str = player_info.get('shootingDate') upload_date_str = player_info.get('shootingDate')
if not upload_date_str: if not upload_date_str:
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0] upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
@ -107,7 +115,7 @@ class ArteTVBaseIE(InfoExtractor):
langcode = LANGS.get(lang, lang) langcode = LANGS.get(lang, lang)
formats = [] formats = []
for format_id, format_dict in player_info['VSR'].items(): for format_id, format_dict in vsr.items():
f = dict(format_dict) f = dict(format_dict)
versionCode = f.get('versionCode') versionCode = f.get('versionCode')
l = re.escape(langcode) l = re.escape(langcode)

View File

@ -0,0 +1,74 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
qualities,
)
import re
class ClippitIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?clippituser\.tv/c/(?P<id>[a-z]+)'
_TEST = {
'url': 'https://www.clippituser.tv/c/evmgm',
'md5': '963ae7a59a2ec4572ab8bf2f2d2c5f09',
'info_dict': {
'id': 'evmgm',
'ext': 'mp4',
'title': 'Bye bye Brutus. #BattleBots - Clippit',
'uploader': 'lizllove',
'uploader_url': 'https://www.clippituser.tv/p/lizllove',
'timestamp': 1472183818,
'upload_date': '20160826',
'description': 'BattleBots | ABC',
'thumbnail': r're:^https?://.*\.jpg$',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<title.*>(.+?)</title>', webpage, 'title')
FORMATS = ('sd', 'hd')
quality = qualities(FORMATS)
formats = []
for format_id in FORMATS:
url = self._html_search_regex(r'data-%s-file="(.+?)"' % format_id,
webpage, 'url', fatal=False)
if not url:
continue
match = re.search(r'/(?P<height>\d+)\.mp4', url)
formats.append({
'url': url,
'format_id': format_id,
'quality': quality(format_id),
'height': int(match.group('height')) if match else None,
})
uploader = self._html_search_regex(r'class="username".*>\s+(.+?)\n',
webpage, 'uploader', fatal=False)
uploader_url = ('https://www.clippituser.tv/p/' + uploader
if uploader else None)
timestamp = self._html_search_regex(r'datetime="(.+?)"',
webpage, 'date', fatal=False)
thumbnail = self._html_search_regex(r'data-image="(.+?)"',
webpage, 'thumbnail', fatal=False)
return {
'id': video_id,
'title': title,
'formats': formats,
'uploader': uploader,
'uploader_url': uploader_url,
'timestamp': parse_iso8601(timestamp),
'description': self._og_search_description(webpage),
'thumbnail': thumbnail,
}

View File

@ -187,6 +187,7 @@ from .chirbit import (
from .cinchcast import CinchcastIE from .cinchcast import CinchcastIE
from .cjsw import CJSWIE from .cjsw import CJSWIE
from .cliphunter import CliphunterIE from .cliphunter import CliphunterIE
from .clippit import ClippitIE
from .cliprs import ClipRsIE from .cliprs import ClipRsIE
from .clipsyndicate import ClipsyndicateIE from .clipsyndicate import ClipsyndicateIE
from .closertotruth import CloserToTruthIE from .closertotruth import CloserToTruthIE

View File

@ -2015,7 +2015,7 @@ class GenericIE(InfoExtractor):
if head_response is not False: if head_response is not False:
# Check for redirect # Check for redirect
new_url = head_response.geturl() new_url = compat_str(head_response.geturl())
if url != new_url: if url != new_url:
self.report_following_redirect(new_url) self.report_following_redirect(new_url)
if force_videoid: if force_videoid:
@ -2116,7 +2116,7 @@ class GenericIE(InfoExtractor):
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
info_dict['formats'] = self._parse_mpd_formats( info_dict['formats'] = self._parse_mpd_formats(
doc, video_id, doc, video_id,
mpd_base_url=full_response.geturl().rpartition('/')[0], mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],
mpd_url=url) mpd_url=url)
self._sort_formats(info_dict['formats']) self._sort_formats(info_dict['formats'])
return info_dict return info_dict

View File

@ -91,12 +91,14 @@ class MixcloudIE(InfoExtractor):
if js_url: if js_url:
js = self._download_webpage(js_url, track_id, fatal=False) js = self._download_webpage(js_url, track_id, fatal=False)
if js: if js:
key = self._search_regex( KEY_RE_TEMPLATE = r'player\s*:\s*{.*?\b%s\s*:\s*(["\'])(?P<key>(?:(?!\1).)+)\1'
r'player\s*:\s*{.*?\bvalue\s*:\s*(["\'])(?P<key>(?:(?!\1).)+)\1', for key_name in ('value', 'key_value'):
js, 'key', default=None, group='key') key = self._search_regex(
if key and isinstance(key, compat_str): KEY_RE_TEMPLATE % key_name, js, 'key',
self._keys.insert(0, key) default=None, group='key')
self._current_key = key if key and isinstance(key, compat_str):
self._keys.insert(0, key)
self._current_key = key
message = self._html_search_regex( message = self._html_search_regex(
r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)', r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',

View File

@ -80,18 +80,24 @@ class PeriscopeIE(PeriscopeBaseIE):
stream = self._call_api( stream = self._call_api(
'getAccessPublic', {'broadcast_id': token}, token) 'getAccessPublic', {'broadcast_id': token}, token)
video_urls = set()
formats = [] formats = []
for format_id in ('replay', 'rtmp', 'hls', 'https_hls'): for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'):
video_url = stream.get(format_id + '_url') video_url = stream.get(format_id + '_url')
if not video_url: if not video_url or video_url in video_urls:
continue continue
f = { video_urls.add(video_url)
if format_id != 'rtmp':
formats.extend(self._extract_m3u8_formats(
video_url, token, 'mp4',
entry_protocol='m3u8_native'
if state in ('ended', 'timed_out') else 'm3u8',
m3u8_id=format_id, fatal=False))
continue
formats.append({
'url': video_url, 'url': video_url,
'ext': 'flv' if format_id == 'rtmp' else 'mp4', 'ext': 'flv' if format_id == 'rtmp' else 'mp4',
} })
if format_id != 'rtmp':
f['protocol'] = 'm3u8_native' if state in ('ended', 'timed_out') else 'm3u8'
formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
return { return {

View File

@ -18,6 +18,7 @@ from ..utils import (
parse_duration, parse_duration,
qualities, qualities,
srt_subtitles_timecode, srt_subtitles_timecode,
try_get,
update_url_query, update_url_query,
urlencode_postdata, urlencode_postdata,
) )
@ -26,6 +27,39 @@ from ..utils import (
class PluralsightBaseIE(InfoExtractor): class PluralsightBaseIE(InfoExtractor):
_API_BASE = 'https://app.pluralsight.com' _API_BASE = 'https://app.pluralsight.com'
def _download_course(self, course_id, url, display_id):
try:
return self._download_course_rpc(course_id, url, display_id)
except ExtractorError:
# Old API fallback
return self._download_json(
'https://app.pluralsight.com/player/user/api/v1/player/payload',
display_id, data=urlencode_postdata({'courseId': course_id}),
headers={'Referer': url})
def _download_course_rpc(self, course_id, url, display_id):
response = self._download_json(
'%s/player/functions/rpc' % self._API_BASE, display_id,
'Downloading course JSON',
data=json.dumps({
'fn': 'bootstrapPlayer',
'payload': {
'courseId': course_id,
},
}).encode('utf-8'),
headers={
'Content-Type': 'application/json;charset=utf-8',
'Referer': url,
})
course = try_get(response, lambda x: x['payload']['course'], dict)
if course:
return course
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, response['error']['message']),
expected=True)
class PluralsightIE(PluralsightBaseIE): class PluralsightIE(PluralsightBaseIE):
IE_NAME = 'pluralsight' IE_NAME = 'pluralsight'
@ -162,10 +196,7 @@ class PluralsightIE(PluralsightBaseIE):
display_id = '%s-%s' % (name, clip_id) display_id = '%s-%s' % (name, clip_id)
course = self._download_json( course = self._download_course(course_name, url, display_id)
'https://app.pluralsight.com/player/user/api/v1/player/payload',
display_id, data=urlencode_postdata({'courseId': course_name}),
headers={'Referer': url})
collection = course['modules'] collection = course['modules']
@ -331,18 +362,7 @@ class PluralsightCourseIE(PluralsightBaseIE):
# TODO: PSM cookie # TODO: PSM cookie
course = self._download_json( course = self._download_course(course_id, url, course_id)
'%s/player/functions/rpc' % self._API_BASE, course_id,
'Downloading course JSON',
data=json.dumps({
'fn': 'bootstrapPlayer',
'payload': {
'courseId': course_id,
}
}).encode('utf-8'),
headers={
'Content-Type': 'application/json;charset=utf-8'
})['payload']['course']
title = course['title'] title = course['title']
course_name = course['name'] course_name = course['name']

View File

@ -2,38 +2,37 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import random import random
import time
import re import re
import time
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
sanitized_Request,
strip_jsonp,
unescapeHTML,
clean_html, clean_html,
ExtractorError, ExtractorError,
strip_jsonp,
unescapeHTML,
) )
class QQMusicIE(InfoExtractor): class QQMusicIE(InfoExtractor):
IE_NAME = 'qqmusic' IE_NAME = 'qqmusic'
IE_DESC = 'QQ音乐' IE_DESC = 'QQ音乐'
_VALID_URL = r'https?://y\.qq\.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)' _VALID_URL = r'https?://y\.qq\.com/n/yqq/song/(?P<id>[0-9A-Za-z]+)\.html'
_TESTS = [{ _TESTS = [{
'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD', 'url': 'https://y.qq.com/n/yqq/song/004295Et37taLD.html',
'md5': '9ce1c1c8445f561506d2e3cfb0255705', 'md5': '5f1e6cea39e182857da7ffc5ef5e6bb8',
'info_dict': { 'info_dict': {
'id': '004295Et37taLD', 'id': '004295Et37taLD',
'ext': 'mp3', 'ext': 'mp3',
'title': '可惜没如果', 'title': '可惜没如果',
'release_date': '20141227', 'release_date': '20141227',
'creator': '林俊杰', 'creator': '林俊杰',
'description': 'md5:d327722d0361576fde558f1ac68a7065', 'description': 'md5:d85afb3051952ecc50a1ee8a286d1eac',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
} }
}, { }, {
'note': 'There is no mp3-320 version of this song.', 'note': 'There is no mp3-320 version of this song.',
'url': 'http://y.qq.com/#type=song&mid=004MsGEo3DdNxV', 'url': 'https://y.qq.com/n/yqq/song/004MsGEo3DdNxV.html',
'md5': 'fa3926f0c585cda0af8fa4f796482e3e', 'md5': 'fa3926f0c585cda0af8fa4f796482e3e',
'info_dict': { 'info_dict': {
'id': '004MsGEo3DdNxV', 'id': '004MsGEo3DdNxV',
@ -46,14 +45,14 @@ class QQMusicIE(InfoExtractor):
} }
}, { }, {
'note': 'lyrics not in .lrc format', 'note': 'lyrics not in .lrc format',
'url': 'http://y.qq.com/#type=song&mid=001JyApY11tIp6', 'url': 'https://y.qq.com/n/yqq/song/001JyApY11tIp6.html',
'info_dict': { 'info_dict': {
'id': '001JyApY11tIp6', 'id': '001JyApY11tIp6',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Shadows Over Transylvania', 'title': 'Shadows Over Transylvania',
'release_date': '19970225', 'release_date': '19970225',
'creator': 'Dark Funeral', 'creator': 'Dark Funeral',
'description': 'md5:ed14d5bd7ecec19609108052c25b2c11', 'description': 'md5:c9b20210587cbcd6836a1c597bab4525',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
}, },
'params': { 'params': {
@ -105,7 +104,7 @@ class QQMusicIE(InfoExtractor):
[r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'], [r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'],
detail_info_page, 'album mid', default=None) detail_info_page, 'album mid', default=None)
if albummid: if albummid:
thumbnail_url = "http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg" \ thumbnail_url = 'http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg' \
% (albummid[-2:-1], albummid[-1], albummid) % (albummid[-2:-1], albummid[-1], albummid)
guid = self.m_r_get_ruin() guid = self.m_r_get_ruin()
@ -156,15 +155,39 @@ class QQPlaylistBaseIE(InfoExtractor):
def qq_static_url(category, mid): def qq_static_url(category, mid):
return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid) return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid)
@classmethod def get_singer_all_songs(self, singmid, num):
def get_entries_from_page(cls, page): return self._download_webpage(
r'https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg', singmid,
query={
'format': 'json',
'inCharset': 'utf8',
'outCharset': 'utf-8',
'platform': 'yqq',
'needNewCode': 0,
'singermid': singmid,
'order': 'listen',
'begin': 0,
'num': num,
'songstatus': 1,
})
def get_entries_from_page(self, singmid):
entries = [] entries = []
for item in re.findall(r'class="data"[^<>]*>([^<>]+)</', page): default_num = 1
song_mid = unescapeHTML(item).split('|')[-5] json_text = self.get_singer_all_songs(singmid, default_num)
entries.append(cls.url_result( json_obj_all_songs = self._parse_json(json_text, singmid)
'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic',
song_mid)) if json_obj_all_songs['code'] == 0:
total = json_obj_all_songs['data']['total']
json_text = self.get_singer_all_songs(singmid, total)
json_obj_all_songs = self._parse_json(json_text, singmid)
for item in json_obj_all_songs['data']['list']:
if item['musicData'].get('songmid') is not None:
songmid = item['musicData']['songmid']
entries.append(self.url_result(
r'https://y.qq.com/n/yqq/song/%s.html' % songmid, 'QQMusic', songmid))
return entries return entries
@ -172,42 +195,32 @@ class QQPlaylistBaseIE(InfoExtractor):
class QQMusicSingerIE(QQPlaylistBaseIE): class QQMusicSingerIE(QQPlaylistBaseIE):
IE_NAME = 'qqmusic:singer' IE_NAME = 'qqmusic:singer'
IE_DESC = 'QQ音乐 - 歌手' IE_DESC = 'QQ音乐 - 歌手'
_VALID_URL = r'https?://y\.qq\.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)' _VALID_URL = r'https?://y\.qq\.com/n/yqq/singer/(?P<id>[0-9A-Za-z]+)\.html'
_TEST = { _TEST = {
'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2', 'url': 'https://y.qq.com/n/yqq/singer/001BLpXF2DyJe2.html',
'info_dict': { 'info_dict': {
'id': '001BLpXF2DyJe2', 'id': '001BLpXF2DyJe2',
'title': '林俊杰', 'title': '林俊杰',
'description': 'md5:870ec08f7d8547c29c93010899103751', 'description': 'md5:870ec08f7d8547c29c93010899103751',
}, },
'playlist_count': 12, 'playlist_mincount': 12,
} }
def _real_extract(self, url): def _real_extract(self, url):
mid = self._match_id(url) mid = self._match_id(url)
singer_page = self._download_webpage( entries = self.get_entries_from_page(mid)
self.qq_static_url('singer', mid), mid, 'Download singer page') singer_page = self._download_webpage(url, mid, 'Download singer page')
entries = self.get_entries_from_page(singer_page)
singer_name = self._html_search_regex( singer_name = self._html_search_regex(
r"singername\s*:\s*'([^']+)'", singer_page, 'singer name', r"singername\s*:\s*'(.*?)'", singer_page, 'singer name', default=None)
default=None)
singer_id = self._html_search_regex(
r"singerid\s*:\s*'([0-9]+)'", singer_page, 'singer id',
default=None)
singer_desc = None singer_desc = None
if singer_id: if mid:
req = sanitized_Request(
'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singerid=%s' % singer_id)
req.add_header(
'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html')
singer_desc_page = self._download_xml( singer_desc_page = self._download_xml(
req, mid, 'Donwload singer description XML') 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg', mid,
'Donwload singer description XML',
query={'utf8': 1, 'outCharset': 'utf-8', 'format': 'xml', 'singermid': mid},
headers={'Referer': 'https://y.qq.com/n/yqq/singer/'})
singer_desc = singer_desc_page.find('./data/info/desc').text singer_desc = singer_desc_page.find('./data/info/desc').text
@ -217,10 +230,10 @@ class QQMusicSingerIE(QQPlaylistBaseIE):
class QQMusicAlbumIE(QQPlaylistBaseIE): class QQMusicAlbumIE(QQPlaylistBaseIE):
IE_NAME = 'qqmusic:album' IE_NAME = 'qqmusic:album'
IE_DESC = 'QQ音乐 - 专辑' IE_DESC = 'QQ音乐 - 专辑'
_VALID_URL = r'https?://y\.qq\.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)' _VALID_URL = r'https?://y\.qq\.com/n/yqq/album/(?P<id>[0-9A-Za-z]+)\.html'
_TESTS = [{ _TESTS = [{
'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1', 'url': 'https://y.qq.com/n/yqq/album/000gXCTb2AhRR1.html',
'info_dict': { 'info_dict': {
'id': '000gXCTb2AhRR1', 'id': '000gXCTb2AhRR1',
'title': '我们都是这样长大的', 'title': '我们都是这样长大的',
@ -228,7 +241,7 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
}, },
'playlist_count': 4, 'playlist_count': 4,
}, { }, {
'url': 'http://y.qq.com/#type=album&mid=002Y5a3b3AlCu3', 'url': 'https://y.qq.com/n/yqq/album/002Y5a3b3AlCu3.html',
'info_dict': { 'info_dict': {
'id': '002Y5a3b3AlCu3', 'id': '002Y5a3b3AlCu3',
'title': '그리고...', 'title': '그리고...',
@ -246,7 +259,7 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
entries = [ entries = [
self.url_result( self.url_result(
'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid'] 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid']
) for song in album['list'] ) for song in album['list']
] ]
album_name = album.get('name') album_name = album.get('name')
@ -260,31 +273,30 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
class QQMusicToplistIE(QQPlaylistBaseIE): class QQMusicToplistIE(QQPlaylistBaseIE):
IE_NAME = 'qqmusic:toplist' IE_NAME = 'qqmusic:toplist'
IE_DESC = 'QQ音乐 - 排行榜' IE_DESC = 'QQ音乐 - 排行榜'
_VALID_URL = r'https?://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)' _VALID_URL = r'https?://y\.qq\.com/n/yqq/toplist/(?P<id>[0-9]+)\.html'
_TESTS = [{ _TESTS = [{
'url': 'http://y.qq.com/#type=toplist&p=global_123', 'url': 'https://y.qq.com/n/yqq/toplist/123.html',
'info_dict': { 'info_dict': {
'id': 'global_123', 'id': '123',
'title': '美国iTunes榜', 'title': '美国iTunes榜',
}, 'description': 'md5:89db2335fdbb10678dee2d43fe9aba08',
'playlist_count': 10,
}, {
'url': 'http://y.qq.com/#type=toplist&p=top_3',
'info_dict': {
'id': 'top_3',
'title': '巅峰榜·欧美',
'description': 'QQ音乐巅峰榜·欧美根据用户收听行为自动生成集结当下最流行的欧美新歌:更新时间每周四22点|统'
'计周期:一周(上周四至本周三)|统计对象:三个月内发行的欧美歌曲|统计数量100首|统计算法:根据'
'歌曲在一周内的有效播放次数由高到低取前100名同一歌手最多允许5首歌曲同时上榜|有效播放次数:'
'登录用户完整播放一首歌曲记为一次有效播放同一用户收听同一首歌曲每天记录为1次有效播放'
}, },
'playlist_count': 100, 'playlist_count': 100,
}, { }, {
'url': 'http://y.qq.com/#type=toplist&p=global_106', 'url': 'https://y.qq.com/n/yqq/toplist/3.html',
'info_dict': { 'info_dict': {
'id': 'global_106', 'id': '3',
'title': '巅峰榜·欧美',
'description': 'md5:5a600d42c01696b26b71f8c4d43407da',
},
'playlist_count': 100,
}, {
'url': 'https://y.qq.com/n/yqq/toplist/106.html',
'info_dict': {
'id': '106',
'title': '韩国Mnet榜', 'title': '韩国Mnet榜',
'description': 'md5:cb84b325215e1d21708c615cac82a6e7',
}, },
'playlist_count': 50, 'playlist_count': 50,
}] }]
@ -292,18 +304,15 @@ class QQMusicToplistIE(QQPlaylistBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
list_id = self._match_id(url) list_id = self._match_id(url)
list_type, num_id = list_id.split("_")
toplist_json = self._download_json( toplist_json = self._download_json(
'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg?type=%s&topid=%s&format=json' 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg', list_id,
% (list_type, num_id), note='Download toplist page',
list_id, 'Download toplist page') query={'type': 'toplist', 'topid': list_id, 'format': 'json'})
entries = [ entries = [self.url_result(
self.url_result( 'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + '.html', 'QQMusic',
'http://y.qq.com/#type=song&mid=' + song['data']['songmid'], 'QQMusic', song['data']['songmid'] song['data']['songmid'])
) for song in toplist_json['songlist'] for song in toplist_json['songlist']]
]
topinfo = toplist_json.get('topinfo', {}) topinfo = toplist_json.get('topinfo', {})
list_name = topinfo.get('ListName') list_name = topinfo.get('ListName')
@ -314,10 +323,10 @@ class QQMusicToplistIE(QQPlaylistBaseIE):
class QQMusicPlaylistIE(QQPlaylistBaseIE): class QQMusicPlaylistIE(QQPlaylistBaseIE):
IE_NAME = 'qqmusic:playlist' IE_NAME = 'qqmusic:playlist'
IE_DESC = 'QQ音乐 - 歌单' IE_DESC = 'QQ音乐 - 歌单'
_VALID_URL = r'https?://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)' _VALID_URL = r'https?://y\.qq\.com/n/yqq/playlist/(?P<id>[0-9]+)\.html'
_TESTS = [{ _TESTS = [{
'url': 'http://y.qq.com/#type=taoge&id=3462654915', 'url': 'http://y.qq.com/n/yqq/playlist/3462654915.html',
'info_dict': { 'info_dict': {
'id': '3462654915', 'id': '3462654915',
'title': '韩国5月新歌精选下旬', 'title': '韩国5月新歌精选下旬',
@ -326,7 +335,7 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):
'playlist_count': 40, 'playlist_count': 40,
'skip': 'playlist gone', 'skip': 'playlist gone',
}, { }, {
'url': 'http://y.qq.com/#type=taoge&id=1374105607', 'url': 'https://y.qq.com/n/yqq/playlist/1374105607.html',
'info_dict': { 'info_dict': {
'id': '1374105607', 'id': '1374105607',
'title': '易入人心的华语民谣', 'title': '易入人心的华语民谣',
@ -339,8 +348,9 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):
list_id = self._match_id(url) list_id = self._match_id(url)
list_json = self._download_json( list_json = self._download_json(
'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?type=1&json=1&utf8=1&onlysong=0&disstid=%s' 'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg',
% list_id, list_id, 'Download list page', list_id, 'Download list page',
query={'type': 1, 'json': 1, 'utf8': 1, 'onlysong': 0, 'disstid': list_id},
transform_source=strip_jsonp) transform_source=strip_jsonp)
if not len(list_json.get('cdlist', [])): if not len(list_json.get('cdlist', [])):
if list_json.get('code'): if list_json.get('code'):
@ -350,11 +360,9 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):
raise ExtractorError('Unable to get playlist info') raise ExtractorError('Unable to get playlist info')
cdlist = list_json['cdlist'][0] cdlist = list_json['cdlist'][0]
entries = [ entries = [self.url_result(
self.url_result( 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'])
'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid'] for song in cdlist['songlist']]
) for song in cdlist['songlist']
]
list_name = cdlist.get('dissname') list_name = cdlist.get('dissname')
list_description = clean_html(unescapeHTML(cdlist.get('desc'))) list_description = clean_html(unescapeHTML(cdlist.get('desc')))

View File

@ -74,7 +74,7 @@ class UdemyIE(InfoExtractor):
return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url
checkout_url = unescapeHTML(self._search_regex( checkout_url = unescapeHTML(self._search_regex(
r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/payment/checkout/.+?)\1', r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/(?:payment|cart)/checkout/.+?)\1',
webpage, 'checkout url', group='url', default=None)) webpage, 'checkout url', group='url', default=None))
if checkout_url: if checkout_url:
raise ExtractorError( raise ExtractorError(

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2017.08.13' __version__ = '2017.08.18'