l1ving_youtube-dl/youtube_dl/extractor/palcomp3.py

# coding: utf-8
from __future__ import unicode_literals

import re

from ..utils import get_element_by_id, get_element_by_attribute
from .common import InfoExtractor


class PalcoMP3IE(InfoExtractor):
    IE_NAME = 'PalcoMP3:song'
    _VALID_URL = r'https?://(?:www\.)?palcomp3\.com/(?P<artist>[^/]+)/(?P<id>[^/]+)/?$'
    _TESTS = [{
        'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/nossas-composicoes-cuida-bem-dela/',
        'md5': '99fd6405b2d8fd589670f6db1ba3b358',
        'info_dict': {
            'id': '3162927',
            'ext': 'mp3',
            'display_id': 'nossas-composicoes-cuida-bem-dela',
            'title': 'Nossas Composições - CUIDA BEM DELA',
            'thumbnail': r'https://studiosol-a.akamaihd.net/tb/80x60/palcomp3-logo/9/d/f/c/356447_20170324175145.jpg',
        }},
        {
        'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/niveis-da-bebida',
        'md5': '4c4d1e45b5ae49396cfff017eb41cdd9',
        'info_dict': {
            'id': '2303899',
            'ext': 'mp3',
            'display_id': 'niveis-da-bebida',
            'title': 'NIVEIS DA BEBIDA',
            'thumbnail': r'https://studiosol-a.akamaihd.net/tb/80x60/palcomp3-logo/9/d/f/c/356447_20170324175145.jpg',
        }},
    ]

    def _json_ld(self, json_ld, display_id, fatal=True, expected_type="MusicGroup"):
        """ override `common.py:_json_ld` as we just need the
            `_search_json_ld` function to get the JSON, but the original
            `_json_ld` function does not fit us."""
        return self._parse_json(json_ld, display_id, fatal=fatal)

    def _extract_common(self, url):
        artist_id = self._VALID_URL_RE.match(url).group('artist')
        webpage = self._download_webpage(url, artist_id)
        self.webpage = webpage

        ld = self._get_ld_info(webpage, artist_id)
        tracks = [self._ld_track_process(track, ld) for track in ld['track']]

        return tracks, ld, webpage

    def _real_extract(self, url, with_webpage=False):
        display_id = self._match_id(url)
        tracks, ld, webpage = self._extract_common(url)
        for track in tracks:
            if track['display_id'] == display_id:
                if with_webpage:
                    return track, webpage
                else:
                    return track

    def _get_ld_info(self, webpage, display_id):
        player = get_element_by_id('player', webpage) or \
            get_element_by_attribute('id', 'player', webpage, escape_value=False)
        ld = self._search_json_ld(player, display_id, expected_type="MusicGroup")
        return ld

    def _ld_track_process(self, track, ld={'genre': None}):
        tmin, tsec = re.findall("PT(\d+)M(\d+)S", track['duration'], re.IGNORECASE)[0]

        return {
            'id': track['@id'],
            'title': track['name'],
            'track': track['name'],
            'url': 'https:' + track['audio'],
            'webpage_url': 'https://www.palcomp3.com' + track['url'],
            'artist': track['byArtist']['name'],
            'thumbnail': track['byArtist']['image'],
            'display_id': track['url'].split('/')[-2],
            'duration': int(tmin) * 60 + int(tsec),
            'genre': ld['genre'],
        }


class PalcoMP3ArtistIE(PalcoMP3IE):
    IE_NAME = 'PalcoMP3:artist'
    _VALID_URL = r'https?://(?:www\.)?palcomp3\.com/(?P<artist>[^/]+)/?$'
    _TESTS = [
        {
            'url': 'https://www.palcomp3.com/banda5cha/',
            'info_dict': {
                'id': 'banda5cha',
                'title': '5Chá',
            },
            'playlist_count': 2,
        },
        {
            'url': 'https://www.palcomp3.com/kleijohnata/',
            'info_dict': {
                'id': 'kleijohnata',
                'title': 'KLEIJOHNATA ',
            },
            'playlist_count': 4,
        },

        # Active famous artist; Very likely to change playlist_count.
        # {
        #     'url': 'https://www.palcomp3.com/maiaraemaraisaoficial',
        #     'info_dict': {
        #         'id': 'maiaraemaraisaoficial',
        #         'title': 'Maiara e Maraisa Oficial',
        #     },
        #     'playlist_count': 8,
        # },
    ]

    def _real_extract(self, url):
        tracks, ld, _ = self._extract_common(url)
        return self.playlist_result(tracks, ld['name'], ld['description'])


class PalcoMP3VideoIE(PalcoMP3IE):
    IE_NAME = 'PalcoMP3:video'
    _VALID_URL = r'https?://(?:www\.)?palcomp3\.com/(?P<artist>[^/]+)/(?P<id>[^/]+)/#clipe$'
    _TESTS = [
        {
            'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/maiara-e-maraisa-voce-faz-falta-aqui-ao-vivo-em-vicosa-mg/#clipe',
            'add_ie': ['Youtube'],
            'info_dict': {
                'id': '_pD1nR2qqPg',
                'ext': 'mp4',
                'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
                'description': 'md5:739d585d094212b999e507377daa21de',
                'upload_date': '20161107',
                'uploader_id': 'maiaramaraisaoficial',
                'uploader': 'Maiara e Maraisa',
            }
        },
        {
            'url': 'https://www.palcomp3.com/mckevinho/dog-vagabundo-mc-phe-cachorrera-part-mc-kevinho/#clipe',
            'add_ie': ['Youtube'],
            'info_dict': {
                'id': 'iKVAfp6-o-Q',
                'ext': 'mp4',
                'title': 'MC Phe Cachorrera e MC Kevinho - Dog Vagabundo (Video Clipe) Jorgin Deejhay / HDUC ep.2',
                'description': 'md5:728024b6905a9a321c8c16e1e1985e56',
                'upload_date': '20170208',
                'uploader': 'GR6 EXPLODE',
                'uploader_id': 'gr6explode',
            }
        },
    ]

    def _real_extract(self, url):
        track, webpage = super(PalcoMP3VideoIE, self)._real_extract(url, with_webpage=True)

        video_re = r"""
            (?x)
            <li (.*?) data-id="{}" (.*?)
                data-id-video="(?P<video_id>[^"]+?)"
            (.*?) >
            <a (.*?) href="([^"]*?)">
            <span>([^<]*?)</span></a></li>
            """.format(track['id'])

        m = re.search(video_re, webpage)
        if not m:
            return None
        video_id = m.group('video_id')

        return {
            '_type': 'url_transparent',
            'ie_key': 'Youtube',
            'id': video_id,
            'url': video_id,
        }
First PalcoMP3 implementation 2017-05-15 00:04:39 -03:00			`# coding: utf-8`
			`from __future__ import unicode_literals`

PalcoMP3: Getting song fine 2017-05-15 02:37:11 -03:00			`import re`

First PalcoMP3 implementation 2017-05-15 00:04:39 -03:00			`from ..utils import get_element_by_id, get_element_by_attribute`
			`from .common import InfoExtractor`


			`class PalcoMP3IE(InfoExtractor):`
Add PalcoMP3 Video extractor 2017-05-15 04:40:20 -03:00			`IE_NAME = 'PalcoMP3:song'`
			`_VALID_URL = r'https?://(?:www\.)?palcomp3\.com/(?P<artist>[^/]+)/(?P<id>[^/]+)/?$'`
PalcoMP3: Getting song fine 2017-05-15 02:37:11 -03:00			`_TESTS = [{`
First PalcoMP3 implementation 2017-05-15 00:04:39 -03:00			`'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/nossas-composicoes-cuida-bem-dela/',`
PalcoMP3: Getting song fine 2017-05-15 02:37:11 -03:00			`'md5': '99fd6405b2d8fd589670f6db1ba3b358',`
First PalcoMP3 implementation 2017-05-15 00:04:39 -03:00			`'info_dict': {`
PalcoMP3: Getting song fine 2017-05-15 02:37:11 -03:00			`'id': '3162927',`
First PalcoMP3 implementation 2017-05-15 00:04:39 -03:00			`'ext': 'mp3',`
PalcoMP3: Getting song fine 2017-05-15 02:37:11 -03:00			`'display_id': 'nossas-composicoes-cuida-bem-dela',`
First PalcoMP3 implementation 2017-05-15 00:04:39 -03:00			`'title': 'Nossas Composições - CUIDA BEM DELA',`
PalcoMP3: Getting song fine 2017-05-15 02:37:11 -03:00			`'thumbnail': r'https://studiosol-a.akamaihd.net/tb/80x60/palcomp3-logo/9/d/f/c/356447_20170324175145.jpg',`
			`}},`
			`{`
Add PalcoMP3 Video extractor 2017-05-15 04:40:20 -03:00			`'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/niveis-da-bebida',`
PalcoMP3: Getting song fine 2017-05-15 02:37:11 -03:00			`'md5': '4c4d1e45b5ae49396cfff017eb41cdd9',`
			`'info_dict': {`
			`'id': '2303899',`
			`'ext': 'mp3',`
			`'display_id': 'niveis-da-bebida',`
			`'title': 'NIVEIS DA BEBIDA',`
			`'thumbnail': r'https://studiosol-a.akamaihd.net/tb/80x60/palcomp3-logo/9/d/f/c/356447_20170324175145.jpg',`
			`}},`
			`]`
First PalcoMP3 implementation 2017-05-15 00:04:39 -03:00
PalcoMP3: Getting song fine 2017-05-15 02:37:11 -03:00			`def _json_ld(self, json_ld, display_id, fatal=True, expected_type="MusicGroup"):`
First PalcoMP3 implementation 2017-05-15 00:04:39 -03:00			""" override `common.py:_json_ld` as we just need the
			`_search_json_ld` function to get the JSON, but the original
			`_json_ld` function does not fit us."""
PalcoMP3: Getting song fine 2017-05-15 02:37:11 -03:00			`return self._parse_json(json_ld, display_id, fatal=fatal)`
First PalcoMP3 implementation 2017-05-15 00:04:39 -03:00
Add PalcoMP3 Video extractor 2017-05-15 04:40:20 -03:00			`def _extract_common(self, url):`
			`artist_id = self._VALID_URL_RE.match(url).group('artist')`
			`webpage = self._download_webpage(url, artist_id)`
			`self.webpage = webpage`
First PalcoMP3 implementation 2017-05-15 00:04:39 -03:00
Add PalcoMP3 Video extractor 2017-05-15 04:40:20 -03:00			`ld = self._get_ld_info(webpage, artist_id)`
Fix flask8 issues. Ready to pull 2017-05-18 04:16:38 -03:00			`tracks = [self._ld_track_process(track, ld) for track in ld['track']]`
Add PalcoMP3 Video extractor 2017-05-15 04:40:20 -03:00
			`return tracks, ld, webpage`

			`def _real_extract(self, url, with_webpage=False):`
			`display_id = self._match_id(url)`
			`tracks, ld, webpage = self._extract_common(url)`
PalcoMP3: Getting song fine 2017-05-15 02:37:11 -03:00			`for track in tracks:`
			`if track['display_id'] == display_id:`
Add PalcoMP3 Video extractor 2017-05-15 04:40:20 -03:00			`if with_webpage:`
			`return track, webpage`
			`else:`
			`return track`
First PalcoMP3 implementation 2017-05-15 00:04:39 -03:00
PalcoMP3: Getting song fine 2017-05-15 02:37:11 -03:00			`def _get_ld_info(self, webpage, display_id):`
Fix flask8 issues. Ready to pull 2017-05-18 04:16:38 -03:00			`player = get_element_by_id('player', webpage) or \`
			`get_element_by_attribute('id', 'player', webpage, escape_value=False)`
PalcoMP3: Getting song fine 2017-05-15 02:37:11 -03:00			`ld = self._search_json_ld(player, display_id, expected_type="MusicGroup")`
			`return ld`
First PalcoMP3 implementation 2017-05-15 00:04:39 -03:00
Fix flask8 issues. Ready to pull 2017-05-18 04:16:38 -03:00			`def _ld_track_process(self, track, ld={'genre': None}):`
PalcoMP3: Getting song fine 2017-05-15 02:37:11 -03:00			`tmin, tsec = re.findall("PT(\d+)M(\d+)S", track['duration'], re.IGNORECASE)[0]`
First PalcoMP3 implementation 2017-05-15 00:04:39 -03:00
			`return {`
PalcoMP3: Getting song fine 2017-05-15 02:37:11 -03:00			`'id': track['@id'],`
			`'title': track['name'],`
			`'track': track['name'],`
			`'url': 'https:' + track['audio'],`
			`'webpage_url': 'https://www.palcomp3.com' + track['url'],`
			`'artist': track['byArtist']['name'],`
			`'thumbnail': track['byArtist']['image'],`
			`'display_id': track['url'].split('/')[-2],`
Fix flask8 issues. Ready to pull 2017-05-18 04:16:38 -03:00			`'duration': int(tmin) * 60 + int(tsec),`
PalcoMP3: Getting song fine 2017-05-15 02:37:11 -03:00			`'genre': ld['genre'],`
Add PalcoMP3 Video extractor 2017-05-15 04:40:20 -03:00			`}`


			`class PalcoMP3ArtistIE(PalcoMP3IE):`
			`IE_NAME = 'PalcoMP3:artist'`
			`_VALID_URL = r'https?://(?:www\.)?palcomp3\.com/(?P<artist>[^/]+)/?$'`
			`_TESTS = [`
			`{`
Fix flask8 issues. Ready to pull 2017-05-18 04:16:38 -03:00			`'url': 'https://www.palcomp3.com/banda5cha/',`
			`'info_dict': {`
			`'id': 'banda5cha',`
			`'title': '5Chá',`
Add PalcoMP3 Video extractor 2017-05-15 04:40:20 -03:00			`},`
Fix flask8 issues. Ready to pull 2017-05-18 04:16:38 -03:00			`'playlist_count': 2,`
Add PalcoMP3 Video extractor 2017-05-15 04:40:20 -03:00			`},`
finishing; need to clean up 2017-05-18 03:38:20 -03:00			`{`
Fix flask8 issues. Ready to pull 2017-05-18 04:16:38 -03:00			`'url': 'https://www.palcomp3.com/kleijohnata/',`
			`'info_dict': {`
			`'id': 'kleijohnata',`
			`'title': 'KLEIJOHNATA ',`
finishing; need to clean up 2017-05-18 03:38:20 -03:00			`},`
Fix flask8 issues. Ready to pull 2017-05-18 04:16:38 -03:00			`'playlist_count': 4,`
finishing; need to clean up 2017-05-18 03:38:20 -03:00			`},`

Fix flask8 issues. Ready to pull 2017-05-18 04:16:38 -03:00			`# Active famous artist; Very likely to change playlist_count.`
finishing; need to clean up 2017-05-18 03:38:20 -03:00			`# {`
Fix flask8 issues. Ready to pull 2017-05-18 04:16:38 -03:00			`# 'url': 'https://www.palcomp3.com/maiaraemaraisaoficial',`
			`# 'info_dict': {`
			`# 'id': 'maiaraemaraisaoficial',`
			`# 'title': 'Maiara e Maraisa Oficial',`
finishing; need to clean up 2017-05-18 03:38:20 -03:00			`# },`
Fix flask8 issues. Ready to pull 2017-05-18 04:16:38 -03:00			`# 'playlist_count': 8,`
finishing; need to clean up 2017-05-18 03:38:20 -03:00			`# },`
Add PalcoMP3 Video extractor 2017-05-15 04:40:20 -03:00			`]`

			`def _real_extract(self, url):`
			`tracks, ld, _ = self._extract_common(url)`
			`return self.playlist_result(tracks, ld['name'], ld['description'])`


			`class PalcoMP3VideoIE(PalcoMP3IE):`
			`IE_NAME = 'PalcoMP3:video'`
			`_VALID_URL = r'https?://(?:www\.)?palcomp3\.com/(?P<artist>[^/]+)/(?P<id>[^/]+)/#clipe$'`
			`_TESTS = [`
			`{`
Fix flask8 issues. Ready to pull 2017-05-18 04:16:38 -03:00			`'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/maiara-e-maraisa-voce-faz-falta-aqui-ao-vivo-em-vicosa-mg/#clipe',`
			`'add_ie': ['Youtube'],`
			`'info_dict': {`
			`'id': '_pD1nR2qqPg',`
			`'ext': 'mp4',`
			`'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',`
			`'description': 'md5:739d585d094212b999e507377daa21de',`
			`'upload_date': '20161107',`
			`'uploader_id': 'maiaramaraisaoficial',`
			`'uploader': 'Maiara e Maraisa',`
			`}`
			`},`
Add PalcoMP3 Video extractor 2017-05-15 04:40:20 -03:00			`{`
Fix flask8 issues. Ready to pull 2017-05-18 04:16:38 -03:00			`'url': 'https://www.palcomp3.com/mckevinho/dog-vagabundo-mc-phe-cachorrera-part-mc-kevinho/#clipe',`
			`'add_ie': ['Youtube'],`
			`'info_dict': {`
			`'id': 'iKVAfp6-o-Q',`
			`'ext': 'mp4',`
			`'title': 'MC Phe Cachorrera e MC Kevinho - Dog Vagabundo (Video Clipe) Jorgin Deejhay / HDUC ep.2',`
			`'description': 'md5:728024b6905a9a321c8c16e1e1985e56',`
			`'upload_date': '20170208',`
			`'uploader': 'GR6 EXPLODE',`
			`'uploader_id': 'gr6explode',`
			`}`
			`},`
Add PalcoMP3 Video extractor 2017-05-15 04:40:20 -03:00			`]`

			`def _real_extract(self, url):`
			`track, webpage = super(PalcoMP3VideoIE, self)._real_extract(url, with_webpage=True)`

			`video_re = r"""`
			`(?x)`
			`<li (.?) data-id="{}" (.?)`
			`data-id-video="(?P<video_id>[^"]+?)"`
			`(.*?) >`
			`<a (.?) href="([^"]?)">`
			`<span>([^<]*?)</span></a></li>`
			`""".format(track['id'])`

Fix flask8 issues. Ready to pull 2017-05-18 04:16:38 -03:00			`m = re.search(video_re, webpage)`
Add PalcoMP3 Video extractor 2017-05-15 04:40:20 -03:00			`if not m:`
			`return None`
			`video_id = m.group('video_id')`

			`return {`
			`'_type': 'url_transparent',`
			`'ie_key': 'Youtube',`
			`'id': video_id,`
			`'url': video_id,`
			`}`