Add initial extractor for Matter Online

2020-02-27 18:52:48 -08:00 · 2020-02-27 18:52:48 -08:00 · a7ca0f9303
commit a7ca0f9303
parent bee6451fe8
2 changed files with 49 additions and 0 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -601,6 +601,7 @@ from .markiza import (
 )
 from .massengeschmacktv import MassengeschmackTVIE
 from .matchtv import MatchTVIE
 from .matter import MatterIE
 from .mdr import MDRIE
 from .mediaset import MediasetIE
 from .mediasite import (
--- a/youtube_dl/extractor/matter.py
+++ b/youtube_dl/extractor/matter.py
@ -0,0 +1,48 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 class MatterIE(InfoExtractor):
    """
    InfoExtractor for Matter Music
    This class should be used to handle tracks. Another class (TODO) will be
    used to implement playlists or other content.
    """
    _VALID_URL = r'https?://app.matter.online/tracks/(?P<id>\d+)/?'
    _TESTS = {
        # TODO: Implement
    }
    def _real_extract(self, url):
        track_id = self._match_id(url)
        webpage = self._download_webpage(
            "https://api.matter.online/api/v1/open-graph/tracks/%s/embedded" % track_id, track_id
        )
        author = self._html_search_regex(
            r'<a href="https://app.matter.online/artists/user_\d+" target="[^"]+">([^<]+)</a>',
            webpage, "author"
        )
        title = self._html_search_regex(
            r'<a href="https://app.matter.online/tracks/\d+" target="[^"]+">([^<]+)</a>',
            webpage, "title"
        )
        download_url = self._html_search_regex(
            r'<source src="(https://matter-production.s3.amazonaws.com/audios/[^\.]+\.[^"]+)"/>',
            webpage, "download_url"
        )
        artwork = self._html_search_regex(
            r'style="background: url\((https://matter-production.s3.amazonaws.com/images/[^\.]+\.[^\)]+)\)',
            webpage, "artwork"
        )
        return {
            'id': track_id,
            'url': download_url,
            'title': title,
            'uploader': author,
            'thumbnail': artwork,
        }