PalcoMP3: Getting song fine
This commit is contained in:
parent
4cf35c4bdb
commit
2c7edaf8f6
@ -1,6 +1,8 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from ..compat import compat_etree_fromstring
|
from ..compat import compat_etree_fromstring
|
||||||
from ..utils import get_element_by_id, get_element_by_attribute
|
from ..utils import get_element_by_id, get_element_by_attribute
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -10,60 +12,72 @@ from pprint import pprint as pp
|
|||||||
|
|
||||||
class PalcoMP3IE(InfoExtractor):
|
class PalcoMP3IE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?palcomp3\.com/(?P<artist>[^/]+)/(?P<id>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?palcomp3\.com/(?P<artist>[^/]+)/(?P<id>[^/]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/nossas-composicoes-cuida-bem-dela/',
|
'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/nossas-composicoes-cuida-bem-dela/',
|
||||||
'md5': '0effca14d6640568df0c1daa1e5609e2',
|
'md5': '99fd6405b2d8fd589670f6db1ba3b358',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'nossas-composicoes-cuida-bem-dela',
|
'id': '3162927',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
|
'display_id': 'nossas-composicoes-cuida-bem-dela',
|
||||||
'title': 'Nossas Composições - CUIDA BEM DELA',
|
'title': 'Nossas Composições - CUIDA BEM DELA',
|
||||||
'thumbnail': r'https://studiosol-a.akamaihd.net/tb/468x351/palcomp3-logo/9/d/f/c/356447_20170324175145.jpg',
|
'thumbnail': r'https://studiosol-a.akamaihd.net/tb/80x60/palcomp3-logo/9/d/f/c/356447_20170324175145.jpg',
|
||||||
# TODO more properties, either as:
|
}},
|
||||||
# * A value
|
{
|
||||||
# * MD5 checksum; start the string with md5:
|
'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/niveis-da-bebida/',
|
||||||
# * A regular expression; start the string with re:
|
'md5': '4c4d1e45b5ae49396cfff017eb41cdd9',
|
||||||
# * Any Python type (for example int or float)
|
'info_dict': {
|
||||||
}
|
'id': '2303899',
|
||||||
}
|
'ext': 'mp3',
|
||||||
|
'display_id': 'niveis-da-bebida',
|
||||||
|
'title': 'NIVEIS DA BEBIDA',
|
||||||
|
'thumbnail': r'https://studiosol-a.akamaihd.net/tb/80x60/palcomp3-logo/9/d/f/c/356447_20170324175145.jpg',
|
||||||
|
}},
|
||||||
|
]
|
||||||
|
|
||||||
def _json_ld(self, json_ld, video_id, fatal=True, expected_type="MusicGroup"):
|
def _json_ld(self, json_ld, display_id, fatal=True, expected_type="MusicGroup"):
|
||||||
""" override `common.py:_json_ld` as we just need the
|
""" override `common.py:_json_ld` as we just need the
|
||||||
`_search_json_ld` function to get the JSON, but the original
|
`_search_json_ld` function to get the JSON, but the original
|
||||||
`_json_ld` function does not fit us."""
|
`_json_ld` function does not fit us."""
|
||||||
return self._parse_json(json_ld, video_id, fatal=fatal)
|
return self._parse_json(json_ld, display_id, fatal=fatal)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
print ("Webpage", type(webpage), len(webpage))
|
print ("Webpage", type(webpage), len(webpage))
|
||||||
|
|
||||||
|
ld = self._get_ld_info(webpage, display_id)
|
||||||
|
tracks = [ self._ld_track_process(track, ld) for track in ld['track'] ]
|
||||||
|
|
||||||
|
# from IPython import embed
|
||||||
|
# embed()
|
||||||
|
for track in tracks:
|
||||||
|
if track['display_id'] == display_id:
|
||||||
|
return track
|
||||||
|
|
||||||
player = get_element_by_id('player', webpage)
|
|
||||||
|
def _get_ld_info(self, webpage, display_id):
|
||||||
# player = get_element_by_attribute('id', 'player', webpage, escape_value=False)
|
# player = get_element_by_attribute('id', 'player', webpage, escape_value=False)
|
||||||
|
player = get_element_by_id('player', webpage)
|
||||||
pp(player)
|
pp(player)
|
||||||
|
|
||||||
|
ld = self._search_json_ld(player, display_id, expected_type="MusicGroup")
|
||||||
|
|
||||||
ld = self._search_json_ld(player, video_id, expected_type="MusicGroup")
|
|
||||||
print("LD:")
|
print("LD:")
|
||||||
pp(ld)
|
pp(ld)
|
||||||
|
|
||||||
# from IPython import embed
|
return ld
|
||||||
# embed()
|
|
||||||
info = self._ld_track_process(ld['track'][0])
|
def _ld_track_process(self, track, ld={'genre':None}):
|
||||||
|
tmin, tsec = re.findall("PT(\d+)M(\d+)S", track['duration'], re.IGNORECASE)[0]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': track['@id'],
|
||||||
'title': self._og_search_title(webpage),
|
'title': track['name'],
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'track': track['name'],
|
||||||
# 'description': self._og_search_description(webpage),
|
'url': 'https:' + track['audio'],
|
||||||
# 'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
|
'webpage_url': 'https://www.palcomp3.com' + track['url'],
|
||||||
# TODO more properties (see youtube_dl/extractor/common.py)
|
'artist': track['byArtist']['name'],
|
||||||
'url': 'https:' + ld['track'][0]['audio'],
|
'thumbnail': track['byArtist']['image'],
|
||||||
}
|
'display_id': track['url'].split('/')[-2],
|
||||||
|
'duration': int(tmin)*60 + int(tsec),
|
||||||
|
'genre': ld['genre'],
|
||||||
def _ld_track_process(self, track):
|
|
||||||
return {
|
|
||||||
'url': 'https:' + track['audio']
|
|
||||||
}
|
}
|
Loading…
x
Reference in New Issue
Block a user