176 lines
6.3 KiB
Python
Raw Normal View History

2017-05-15 00:04:39 -03:00
# coding: utf-8
from __future__ import unicode_literals
2017-05-15 02:37:11 -03:00
import re
2017-05-15 00:04:39 -03:00
from ..utils import get_element_by_id, get_element_by_attribute
from .common import InfoExtractor
class PalcoMP3IE(InfoExtractor):
2017-05-15 04:40:20 -03:00
IE_NAME = 'PalcoMP3:song'
_VALID_URL = r'https?://(?:www\.)?palcomp3\.com/(?P<artist>[^/]+)/(?P<id>[^/]+)/?$'
2017-05-15 02:37:11 -03:00
_TESTS = [{
2017-05-15 00:04:39 -03:00
'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/nossas-composicoes-cuida-bem-dela/',
2017-05-15 02:37:11 -03:00
'md5': '99fd6405b2d8fd589670f6db1ba3b358',
2017-05-15 00:04:39 -03:00
'info_dict': {
2017-05-15 02:37:11 -03:00
'id': '3162927',
2017-05-15 00:04:39 -03:00
'ext': 'mp3',
2017-05-15 02:37:11 -03:00
'display_id': 'nossas-composicoes-cuida-bem-dela',
2017-05-15 00:04:39 -03:00
'title': 'Nossas Composições - CUIDA BEM DELA',
2017-05-15 02:37:11 -03:00
'thumbnail': r'https://studiosol-a.akamaihd.net/tb/80x60/palcomp3-logo/9/d/f/c/356447_20170324175145.jpg',
}},
{
2017-05-15 04:40:20 -03:00
'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/niveis-da-bebida',
2017-05-15 02:37:11 -03:00
'md5': '4c4d1e45b5ae49396cfff017eb41cdd9',
'info_dict': {
'id': '2303899',
'ext': 'mp3',
'display_id': 'niveis-da-bebida',
'title': 'NIVEIS DA BEBIDA',
'thumbnail': r'https://studiosol-a.akamaihd.net/tb/80x60/palcomp3-logo/9/d/f/c/356447_20170324175145.jpg',
}},
]
2017-05-15 00:04:39 -03:00
2017-05-15 02:37:11 -03:00
def _json_ld(self, json_ld, display_id, fatal=True, expected_type="MusicGroup"):
2017-05-15 00:04:39 -03:00
""" override `common.py:_json_ld` as we just need the
`_search_json_ld` function to get the JSON, but the original
`_json_ld` function does not fit us."""
2017-05-15 02:37:11 -03:00
return self._parse_json(json_ld, display_id, fatal=fatal)
2017-05-15 00:04:39 -03:00
2017-05-15 04:40:20 -03:00
def _extract_common(self, url):
artist_id = self._VALID_URL_RE.match(url).group('artist')
webpage = self._download_webpage(url, artist_id)
self.webpage = webpage
2017-05-15 00:04:39 -03:00
2017-05-15 04:40:20 -03:00
ld = self._get_ld_info(webpage, artist_id)
2017-05-18 04:16:38 -03:00
tracks = [self._ld_track_process(track, ld) for track in ld['track']]
2017-05-15 04:40:20 -03:00
return tracks, ld, webpage
def _real_extract(self, url, with_webpage=False):
display_id = self._match_id(url)
tracks, ld, webpage = self._extract_common(url)
2017-05-15 02:37:11 -03:00
for track in tracks:
if track['display_id'] == display_id:
2017-05-15 04:40:20 -03:00
if with_webpage:
return track, webpage
else:
return track
2017-05-15 00:04:39 -03:00
2017-05-15 02:37:11 -03:00
def _get_ld_info(self, webpage, display_id):
2017-05-18 04:16:38 -03:00
player = get_element_by_id('player', webpage) or \
get_element_by_attribute('id', 'player', webpage, escape_value=False)
2017-05-15 02:37:11 -03:00
ld = self._search_json_ld(player, display_id, expected_type="MusicGroup")
return ld
2017-05-15 00:04:39 -03:00
2017-05-18 04:16:38 -03:00
def _ld_track_process(self, track, ld={'genre': None}):
2017-05-15 02:37:11 -03:00
tmin, tsec = re.findall("PT(\d+)M(\d+)S", track['duration'], re.IGNORECASE)[0]
2017-05-15 00:04:39 -03:00
return {
2017-05-15 02:37:11 -03:00
'id': track['@id'],
'title': track['name'],
'track': track['name'],
'url': 'https:' + track['audio'],
'webpage_url': 'https://www.palcomp3.com' + track['url'],
'artist': track['byArtist']['name'],
'thumbnail': track['byArtist']['image'],
'display_id': track['url'].split('/')[-2],
2017-05-18 04:16:38 -03:00
'duration': int(tmin) * 60 + int(tsec),
2017-05-15 02:37:11 -03:00
'genre': ld['genre'],
2017-05-15 04:40:20 -03:00
}
class PalcoMP3ArtistIE(PalcoMP3IE):
IE_NAME = 'PalcoMP3:artist'
_VALID_URL = r'https?://(?:www\.)?palcomp3\.com/(?P<artist>[^/]+)/?$'
_TESTS = [
{
2017-05-18 04:16:38 -03:00
'url': 'https://www.palcomp3.com/banda5cha/',
'info_dict': {
'id': 'banda5cha',
'title': '5Chá',
2017-05-15 04:40:20 -03:00
},
2017-05-18 04:16:38 -03:00
'playlist_count': 2,
2017-05-15 04:40:20 -03:00
},
2017-05-18 03:38:20 -03:00
{
2017-05-18 04:16:38 -03:00
'url': 'https://www.palcomp3.com/kleijohnata/',
'info_dict': {
'id': 'kleijohnata',
'title': 'KLEIJOHNATA ',
2017-05-18 03:38:20 -03:00
},
2017-05-18 04:16:38 -03:00
'playlist_count': 4,
2017-05-18 03:38:20 -03:00
},
2017-05-18 04:16:38 -03:00
# Active famous artist; Very likely to change playlist_count.
2017-05-18 03:38:20 -03:00
# {
2017-05-18 04:16:38 -03:00
# 'url': 'https://www.palcomp3.com/maiaraemaraisaoficial',
# 'info_dict': {
# 'id': 'maiaraemaraisaoficial',
# 'title': 'Maiara e Maraisa Oficial',
2017-05-18 03:38:20 -03:00
# },
2017-05-18 04:16:38 -03:00
# 'playlist_count': 8,
2017-05-18 03:38:20 -03:00
# },
2017-05-15 04:40:20 -03:00
]
def _real_extract(self, url):
tracks, ld, _ = self._extract_common(url)
return self.playlist_result(tracks, ld['name'], ld['description'])
class PalcoMP3VideoIE(PalcoMP3IE):
IE_NAME = 'PalcoMP3:video'
_VALID_URL = r'https?://(?:www\.)?palcomp3\.com/(?P<artist>[^/]+)/(?P<id>[^/]+)/#clipe$'
_TESTS = [
{
2017-05-18 04:16:38 -03:00
'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/maiara-e-maraisa-voce-faz-falta-aqui-ao-vivo-em-vicosa-mg/#clipe',
'add_ie': ['Youtube'],
'info_dict': {
'id': '_pD1nR2qqPg',
'ext': 'mp4',
'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
'description': 'md5:739d585d094212b999e507377daa21de',
'upload_date': '20161107',
'uploader_id': 'maiaramaraisaoficial',
'uploader': 'Maiara e Maraisa',
}
},
2017-05-15 04:40:20 -03:00
{
2017-05-18 04:16:38 -03:00
'url': 'https://www.palcomp3.com/mckevinho/dog-vagabundo-mc-phe-cachorrera-part-mc-kevinho/#clipe',
'add_ie': ['Youtube'],
'info_dict': {
'id': 'iKVAfp6-o-Q',
'ext': 'mp4',
'title': 'MC Phe Cachorrera e MC Kevinho - Dog Vagabundo (Video Clipe) Jorgin Deejhay / HDUC ep.2',
'description': 'md5:728024b6905a9a321c8c16e1e1985e56',
'upload_date': '20170208',
'uploader': 'GR6 EXPLODE',
'uploader_id': 'gr6explode',
}
},
2017-05-15 04:40:20 -03:00
]
def _real_extract(self, url):
track, webpage = super(PalcoMP3VideoIE, self)._real_extract(url, with_webpage=True)
video_re = r"""
(?x)
<li (.*?) data-id="{}" (.*?)
data-id-video="(?P<video_id>[^"]+?)"
(.*?) >
<a (.*?) href="([^"]*?)">
<span>([^<]*?)</span></a></li>
""".format(track['id'])
2017-05-18 04:16:38 -03:00
m = re.search(video_re, webpage)
2017-05-15 04:40:20 -03:00
if not m:
return None
video_id = m.group('video_id')
return {
'_type': 'url_transparent',
'ie_key': 'Youtube',
'id': video_id,
'url': video_id,
}