Merge branch 'master' of github.com:rg3/youtube-dl

* 'master' of github.com:rg3/youtube-dl: [dramafever] Remove extractor(closes #20868) [adn] fix subtitle extraction(#12724) [youtube] extract album from Music in this video section(#20301) [ccc] Improve extraction (closes #14601, closes #20355) [ccc] Extract creator [ccc:playlist] Add extractor [sverigesradio] improve extraction(closes #18635) [sverigesradio] Add extractor [cinemax] Add new extractor [sixplay] add missing parenthesis [sixplay] try to extract non drm protected manifests(closes #20849) [youtube] improve Youtube Music Auto-generated description parsing(closes #20742) [youtube] Extract additional meta data from video description on youtube music videos
2019-04-28 21:23:19 -07:00 · 2019-04-28 21:23:19 -07:00 · 35c3104d7b
commit 35c3104d7b
parent 41640223b4 6e07b5a6d5
9 changed files with 331 additions and 294 deletions
--- a/youtube_dl/extractor/adn.py
+++ b/youtube_dl/extractor/adn.py
@ -65,14 +65,15 @@ class ADNIE(InfoExtractor):
        if subtitle_location:
            enc_subtitles = self._download_webpage(
                urljoin(self._BASE_URL, subtitle_location),
-                video_id, 'Downloading subtitles data', fatal=False)
+                video_id, 'Downloading subtitles data', fatal=False,
+                headers={'Origin': 'https://animedigitalnetwork.fr'})
        if not enc_subtitles:
            return None

        # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
        dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
            bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
-            bytes_to_intlist(binascii.unhexlify(self._K + '4421de0a5f0814ba')),
+            bytes_to_intlist(binascii.unhexlify(self._K + '4b8ef13ec1872730')),
            bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
        ))
        subtitles_json = self._parse_json(
--- a/youtube_dl/extractor/ccc.py
+++ b/youtube_dl/extractor/ccc.py
@ -1,9 +1,12 @@
+# coding: utf-8
 from __future__ import unicode_literals

 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    parse_iso8601,
+    try_get,
+    url_or_none,
 )


@ -18,11 +21,13 @@ class CCCIE(InfoExtractor):
            'id': '1839',
            'ext': 'mp4',
            'title': 'Introduction to Processor Design',
+            'creator': 'byterazor',
            'description': 'md5:df55f6d073d4ceae55aae6f2fd98a0ac',
            'thumbnail': r're:^https?://.*\.jpg$',
            'upload_date': '20131228',
            'timestamp': 1388188800,
            'duration': 3710,
+            'tags': list,
        }
    }, {
        'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download',
@ -68,6 +73,7 @@ class CCCIE(InfoExtractor):
            'id': event_id,
            'display_id': display_id,
            'title': event_data['title'],
+            'creator': try_get(event_data, lambda x: ', '.join(x['persons'])),
            'description': event_data.get('description'),
            'thumbnail': event_data.get('thumb_url'),
            'timestamp': parse_iso8601(event_data.get('date')),
@ -75,3 +81,31 @@ class CCCIE(InfoExtractor):
            'tags': event_data.get('tags'),
            'formats': formats,
        }
+
+
+class CCCPlaylistIE(InfoExtractor):
+    IE_NAME = 'media.ccc.de:lists'
+    _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/c/(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://media.ccc.de/c/30c3',
+        'info_dict': {
+            'title': '30C3',
+            'id': '30c3',
+        },
+        'playlist_count': 135,
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url).lower()
+
+        conf = self._download_json(
+            'https://media.ccc.de/public/conferences/' + playlist_id,
+            playlist_id)
+
+        entries = []
+        for e in conf['events']:
+            event_url = url_or_none(e.get('frontend_link'))
+            if event_url:
+                entries.append(self.url_result(event_url, ie=CCCIE.ie_key()))
+
+        return self.playlist_result(entries, playlist_id, conf.get('title'))
--- a/youtube_dl/extractor/cinemax.py
+++ b/youtube_dl/extractor/cinemax.py
@ -0,0 +1,29 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .hbo import HBOBaseIE
+
+
+class CinemaxIE(HBOBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?cinemax\.com/(?P<path>[^/]+/video/[0-9a-z-]+-(?P<id>\d+))'
+    _TESTS = [{
+        'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903',
+        'md5': '82e0734bba8aa7ef526c9dd00cf35a05',
+        'info_dict': {
+            'id': '20126903',
+            'ext': 'mp4',
+            'title': 'S1 Ep 1: Recap',
+        },
+        'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'],
+    }, {
+        'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903.embed',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        path, video_id = re.match(self._VALID_URL, url).groups()
+        info = self._extract_info('https://www.cinemax.com/%s.xml' % path, video_id)
+        info['id'] = video_id
+        return info
--- a/youtube_dl/extractor/dramafever.py
+++ b/youtube_dl/extractor/dramafever.py
@ -1,266 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import itertools
-import json
-
-from .common import InfoExtractor
-from ..compat import (
-    compat_HTTPError,
-    compat_urlparse,
-)
-from ..utils import (
-    clean_html,
-    ExtractorError,
-    int_or_none,
-    parse_age_limit,
-    parse_duration,
-    unified_timestamp,
-    url_or_none,
-)
-
-
-class DramaFeverBaseIE(InfoExtractor):
-    _NETRC_MACHINE = 'dramafever'
-
-    _CONSUMER_SECRET = 'DA59dtVXYLxajktV'
-
-    _consumer_secret = None
-
-    def _get_consumer_secret(self):
-        mainjs = self._download_webpage(
-            'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js',
-            None, 'Downloading main.js', fatal=False)
-        if not mainjs:
-            return self._CONSUMER_SECRET
-        return self._search_regex(
-            r"var\s+cs\s*=\s*'([^']+)'", mainjs,
-            'consumer secret', default=self._CONSUMER_SECRET)
-
-    def _real_initialize(self):
-        self._consumer_secret = self._get_consumer_secret()
-        self._login()
-
-    def _login(self):
-        username, password = self._get_login_info()
-        if username is None:
-            return
-
-        login_form = {
-            'username': username,
-            'password': password,
-        }
-
-        try:
-            response = self._download_json(
-                'https://www.dramafever.com/api/users/login', None, 'Logging in',
-                data=json.dumps(login_form).encode('utf-8'), headers={
-                    'x-consumer-key': self._consumer_secret,
-                })
-        except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code in (403, 404):
-                response = self._parse_json(
-                    e.cause.read().decode('utf-8'), None)
-            else:
-                raise
-
-        # Successful login
-        if response.get('result') or response.get('guid') or response.get('user_guid'):
-            return
-
-        errors = response.get('errors')
-        if errors and isinstance(errors, list):
-            error = errors[0]
-            message = error.get('message') or error['reason']
-            raise ExtractorError('Unable to login: %s' % message, expected=True)
-        raise ExtractorError('Unable to log in')
-
-
-class DramaFeverIE(DramaFeverBaseIE):
-    IE_NAME = 'dramafever'
-    _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
-    _TESTS = [{
-        'url': 'https://www.dramafever.com/drama/4274/1/Heirs/',
-        'info_dict': {
-            'id': '4274.1',
-            'ext': 'wvm',
-            'title': 'Heirs - Episode 1',
-            'description': 'md5:362a24ba18209f6276e032a651c50bc2',
-            'thumbnail': r're:^https?://.*\.jpg',
-            'duration': 3783,
-            'timestamp': 1381354993,
-            'upload_date': '20131009',
-            'series': 'Heirs',
-            'season_number': 1,
-            'episode': 'Episode 1',
-            'episode_number': 1,
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
-    }, {
-        'url': 'http://www.dramafever.com/drama/4826/4/Mnet_Asian_Music_Awards_2015/?ap=1',
-        'info_dict': {
-            'id': '4826.4',
-            'ext': 'flv',
-            'title': 'Mnet Asian Music Awards 2015',
-            'description': 'md5:3ff2ee8fedaef86e076791c909cf2e91',
-            'episode': 'Mnet Asian Music Awards 2015 - Part 3',
-            'episode_number': 4,
-            'thumbnail': r're:^https?://.*\.jpg',
-            'timestamp': 1450213200,
-            'upload_date': '20151215',
-            'duration': 5359,
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
-    }, {
-        'url': 'https://www.dramafever.com/zh-cn/drama/4972/15/Doctor_Romantic/',
-        'only_matching': True,
-    }]
-
-    def _call_api(self, path, video_id, note, fatal=False):
-        return self._download_json(
-            'https://www.dramafever.com/api/5/' + path,
-            video_id, note=note, headers={
-                'x-consumer-key': self._consumer_secret,
-            }, fatal=fatal)
-
-    def _get_subtitles(self, video_id):
-        subtitles = {}
-        subs = self._call_api(
-            'video/%s/subtitles/webvtt/' % video_id, video_id,
-            'Downloading subtitles JSON', fatal=False)
-        if not subs or not isinstance(subs, list):
-            return subtitles
-        for sub in subs:
-            if not isinstance(sub, dict):
-                continue
-            sub_url = url_or_none(sub.get('url'))
-            if not sub_url:
-                continue
-            subtitles.setdefault(
-                sub.get('code') or sub.get('language') or 'en', []).append({
-                    'url': sub_url
-                })
-        return subtitles
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url).replace('/', '.')
-
-        series_id, episode_number = video_id.split('.')
-
-        video = self._call_api(
-            'series/%s/episodes/%s/' % (series_id, episode_number), video_id,
-            'Downloading video JSON')
-
-        formats = []
-        download_assets = video.get('download_assets')
-        if download_assets and isinstance(download_assets, dict):
-            for format_id, format_dict in download_assets.items():
-                if not isinstance(format_dict, dict):
-                    continue
-                format_url = url_or_none(format_dict.get('url'))
-                if not format_url:
-                    continue
-                formats.append({
-                    'url': format_url,
-                    'format_id': format_id,
-                    'filesize': int_or_none(video.get('filesize')),
-                })
-
-        stream = self._call_api(
-            'video/%s/stream/' % video_id, video_id, 'Downloading stream JSON',
-            fatal=False)
-        if stream:
-            stream_url = stream.get('stream_url')
-            if stream_url:
-                formats.extend(self._extract_m3u8_formats(
-                    stream_url, video_id, 'mp4', entry_protocol='m3u8_native',
-                    m3u8_id='hls', fatal=False))
-        self._sort_formats(formats)
-
-        title = video.get('title') or 'Episode %s' % episode_number
-        description = video.get('description')
-        thumbnail = video.get('thumbnail')
-        timestamp = unified_timestamp(video.get('release_date'))
-        duration = parse_duration(video.get('duration'))
-        age_limit = parse_age_limit(video.get('tv_rating'))
-        series = video.get('series_title')
-        season_number = int_or_none(video.get('season'))
-
-        if series:
-            title = '%s - %s' % (series, title)
-
-        subtitles = self.extract_subtitles(video_id)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'duration': duration,
-            'timestamp': timestamp,
-            'age_limit': age_limit,
-            'series': series,
-            'season_number': season_number,
-            'episode_number': int_or_none(episode_number),
-            'formats': formats,
-            'subtitles': subtitles,
-        }
-
-
-class DramaFeverSeriesIE(DramaFeverBaseIE):
-    IE_NAME = 'dramafever:series'
-    _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$'
-    _TESTS = [{
-        'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/',
-        'info_dict': {
-            'id': '4512',
-            'title': 'Cooking with Shin',
-            'description': 'md5:84a3f26e3cdc3fb7f500211b3593b5c1',
-        },
-        'playlist_count': 4,
-    }, {
-        'url': 'http://www.dramafever.com/drama/124/IRIS/',
-        'info_dict': {
-            'id': '124',
-            'title': 'IRIS',
-            'description': 'md5:b3a30e587cf20c59bd1c01ec0ee1b862',
-        },
-        'playlist_count': 20,
-    }]
-
-    _PAGE_SIZE = 60  # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-)
-
-    def _real_extract(self, url):
-        series_id = self._match_id(url)
-
-        series = self._download_json(
-            'http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s'
-            % (self._consumer_secret, series_id),
-            series_id, 'Downloading series JSON')['series'][series_id]
-
-        title = clean_html(series['name'])
-        description = clean_html(series.get('description') or series.get('description_short'))
-
-        entries = []
-        for page_num in itertools.count(1):
-            episodes = self._download_json(
-                'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d'
-                % (self._consumer_secret, series_id, self._PAGE_SIZE, page_num),
-                series_id, 'Downloading episodes JSON page #%d' % page_num)
-            for episode in episodes.get('value', []):
-                episode_url = episode.get('episode_url')
-                if not episode_url:
-                    continue
-                entries.append(self.url_result(
-                    compat_urlparse.urljoin(url, episode_url),
-                    'DramaFever', episode.get('guid')))
-            if page_num == episodes['num_pages']:
-                break
-
-        return self.playlist_result(entries, series_id, title, description)
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -177,7 +177,10 @@ from .cbsnews import (
    CBSNewsLiveVideoIE,
 )
 from .cbssports import CBSSportsIE
-from .ccc import CCCIE
+from .ccc import (
+    CCCIE,
+    CCCPlaylistIE,
+)
 from .ccma import CCMAIE
 from .cctv import CCTVIE
 from .cda import CDAIE
@ -194,6 +197,7 @@ from .chirbit import (
    ChirbitProfileIE,
 )
 from .cinchcast import CinchcastIE
+from .cinemax import CinemaxIE
 from .ciscolive import (
    CiscoLiveSessionIE,
    CiscoLiveSearchIE,
@ -283,10 +287,6 @@ from .dplay import (
    DPlayIE,
    DPlayItIE,
 )
-from .dramafever import (
-    DramaFeverIE,
-    DramaFeverSeriesIE,
-)
 from .dreisat import DreiSatIE
 from .drbonanza import DRBonanzaIE
 from .drtuber import DrTuberIE
@ -1097,6 +1097,10 @@ from .streetvoice import StreetVoiceIE
 from .stretchinternet import StretchInternetIE
 from .stv import STVPlayerIE
 from .sunporno import SunPornoIE
+from .sverigesradio import (
+    SverigesRadioEpisodeIE,
+    SverigesRadioPublicationIE,
+)
 from .svt import (
    SVTIE,
    SVTPageIE,
--- a/youtube_dl/extractor/hbo.py
+++ b/youtube_dl/extractor/hbo.py
@ -13,19 +13,7 @@ from ..utils import (
 )


-class HBOIE(InfoExtractor):
-    IE_NAME = 'hbo'
-    _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?:video|embed)(?:/[^/]+)*/(?P<id>[^/?#]+)'
-    _TEST = {
-        'url': 'https://www.hbo.com/video/game-of-thrones/seasons/season-8/videos/trailer',
-        'md5': '8126210656f433c452a21367f9ad85b3',
-        'info_dict': {
-            'id': '22113301',
-            'ext': 'mp4',
-            'title': 'Game of Thrones - Trailer',
-        },
-        'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'],
-    }
+class HBOBaseIE(InfoExtractor):
    _FORMATS_INFO = {
        'pro7': {
            'width': 1280,
@ -65,12 +53,8 @@ class HBOIE(InfoExtractor):
        },
    }

-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-        location_path = self._parse_json(self._html_search_regex(
-            r'data-state="({.+?})"', webpage, 'state'), display_id)['video']['locationUrl']
-        video_data = self._download_xml(urljoin(url, location_path), display_id)
+    def _extract_info(self, url, display_id):
+        video_data = self._download_xml(url, display_id)
        video_id = xpath_text(video_data, 'id', fatal=True)
        episode_title = title = xpath_text(video_data, 'title', fatal=True)
        series = xpath_text(video_data, 'program')
@ -167,3 +151,25 @@ class HBOIE(InfoExtractor):
            'thumbnails': thumbnails,
            'subtitles': subtitles,
        }
+
+
+class HBOIE(HBOBaseIE):
+    IE_NAME = 'hbo'
+    _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?:video|embed)(?:/[^/]+)*/(?P<id>[^/?#]+)'
+    _TEST = {
+        'url': 'https://www.hbo.com/video/game-of-thrones/seasons/season-8/videos/trailer',
+        'md5': '8126210656f433c452a21367f9ad85b3',
+        'info_dict': {
+            'id': '22113301',
+            'ext': 'mp4',
+            'title': 'Game of Thrones - Trailer',
+        },
+        'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'],
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        location_path = self._parse_json(self._html_search_regex(
+            r'data-state="({.+?})"', webpage, 'state'), display_id)['video']['locationUrl']
+        return self._extract_info(urljoin(url, location_path), display_id)
--- a/youtube_dl/extractor/sixplay.py
+++ b/youtube_dl/extractor/sixplay.py
@ -65,7 +65,7 @@ class SixPlayIE(InfoExtractor):
        for asset in assets:
            asset_url = asset.get('full_physical_path')
            protocol = asset.get('protocol')
-            if not asset_url or protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264' or asset_url in urls:
+            if not asset_url or ((protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264') and not ('_drmnp.ism/' in asset_url or '_unpnp.ism/' in asset_url)) or asset_url in urls:
                continue
            urls.append(asset_url)
            container = asset.get('video_container')
@ -82,6 +82,7 @@ class SixPlayIE(InfoExtractor):
                        if not urlh:
                            continue
                        asset_url = urlh.geturl()
+                    asset_url = asset_url.replace('_drmnp.ism/', '_unpnp.ism/')
                    for i in range(3, 0, -1):
                        asset_url = asset_url = asset_url.replace('_sd1/', '_sd%d/' % i)
                        m3u8_formats = self._extract_m3u8_formats(
--- a/youtube_dl/extractor/sverigesradio.py
+++ b/youtube_dl/extractor/sverigesradio.py
@ -0,0 +1,115 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    str_or_none,
+)
+
+
+class SverigesRadioBaseIE(InfoExtractor):
+    _BASE_URL = 'https://sverigesradio.se/sida/playerajax/'
+    _QUALITIES = ['low', 'medium', 'high']
+    _EXT_TO_CODEC_MAP = {
+        'mp3': 'mp3',
+        'm4a': 'aac',
+    }
+    _CODING_FORMAT_TO_ABR_MAP = {
+        5: 128,
+        11: 192,
+        12: 32,
+        13: 96,
+    }
+
+    def _real_extract(self, url):
+        audio_id = self._match_id(url)
+        query = {
+            'id': audio_id,
+            'type': self._AUDIO_TYPE,
+        }
+
+        item = self._download_json(
+            self._BASE_URL + 'audiometadata', audio_id,
+            'Downloading audio JSON metadata', query=query)['items'][0]
+        title = item['subtitle']
+
+        query['format'] = 'iis'
+        urls = []
+        formats = []
+        for quality in self._QUALITIES:
+            query['quality'] = quality
+            audio_url_data = self._download_json(
+                self._BASE_URL + 'getaudiourl', audio_id,
+                'Downloading %s format JSON metadata' % quality,
+                fatal=False, query=query) or {}
+            audio_url = audio_url_data.get('audioUrl')
+            if not audio_url or audio_url in urls:
+                continue
+            urls.append(audio_url)
+            ext = determine_ext(audio_url)
+            coding_format = audio_url_data.get('codingFormat')
+            abr = int_or_none(self._search_regex(
+                r'_a(\d+)\.m4a', audio_url, 'audio bitrate',
+                default=None)) or self._CODING_FORMAT_TO_ABR_MAP.get(coding_format)
+            formats.append({
+                'abr': abr,
+                'acodec': self._EXT_TO_CODEC_MAP.get(ext),
+                'ext': ext,
+                'format_id': str_or_none(coding_format),
+                'vcodec': 'none',
+                'url': audio_url,
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': audio_id,
+            'title': title,
+            'formats': formats,
+            'series': item.get('title'),
+            'duration': int_or_none(item.get('duration')),
+            'thumbnail': item.get('displayimageurl'),
+            'description': item.get('description'),
+        }
+
+
+class SverigesRadioPublicationIE(SverigesRadioBaseIE):
+    IE_NAME = 'sverigesradio:publication'
+    _VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/sida/(?:artikel|gruppsida)\.aspx\?.*?\bartikel=(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'https://sverigesradio.se/sida/artikel.aspx?programid=83&artikel=7038546',
+        'md5': '6a4917e1923fccb080e5a206a5afa542',
+        'info_dict': {
+            'id': '7038546',
+            'ext': 'm4a',
+            'duration': 132,
+            'series': 'Nyheter (Ekot)',
+            'title': 'Esa Teittinen: Sanningen har inte kommit fram',
+            'description': 'md5:daf7ce66a8f0a53d5465a5984d3839df',
+            'thumbnail': r're:^https?://.*\.jpg',
+        },
+    }, {
+        'url': 'https://sverigesradio.se/sida/gruppsida.aspx?programid=3304&grupp=6247&artikel=7146887',
+        'only_matching': True,
+    }]
+    _AUDIO_TYPE = 'publication'
+
+
+class SverigesRadioEpisodeIE(SverigesRadioBaseIE):
+    IE_NAME = 'sverigesradio:episode'
+    _VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/(?:sida/)?avsnitt/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'https://sverigesradio.se/avsnitt/1140922?programid=1300',
+        'md5': '20dc4d8db24228f846be390b0c59a07c',
+        'info_dict': {
+            'id': '1140922',
+            'ext': 'mp3',
+            'duration': 3307,
+            'series': 'Konflikt',
+            'title': 'Metoo och valen',
+            'description': 'md5:fcb5c1f667f00badcc702b196f10a27e',
+            'thumbnail': r're:^https?://.*\.jpg',
+        }
+    }
+    _AUDIO_TYPE = 'episode'
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -908,6 +908,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
                'track': 'Dark Walk - Position Music',
                'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
+                'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
            },
            'params': {
                'skip_download': True,
@ -1086,7 +1087,95 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'skip_download': True,
                'youtube_include_dash_manifest': False,
            },
-        }
+        },
+        {
+            # Youtube Music Auto-generated description
+            'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
+            'info_dict': {
+                'id': 'MgNrAu2pzNs',
+                'ext': 'mp4',
+                'title': 'Voyeur Girl',
+                'description': 'md5:7ae382a65843d6df2685993e90a8628f',
+                'upload_date': '20190312',
+                'uploader': 'Various Artists - Topic',
+                'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
+                'artist': 'Stephen',
+                'track': 'Voyeur Girl',
+                'album': 'it\'s too much love to know my dear',
+                'release_date': '20190313',
+                'release_year': 2019,
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
+        {
+            # Youtube Music Auto-generated description
+            # Retrieve 'artist' field from 'Artist:' in video description
+            # when it is present on youtube music video
+            'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
+            'info_dict': {
+                'id': 'k0jLE7tTwjY',
+                'ext': 'mp4',
+                'title': 'Latch Feat. Sam Smith',
+                'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
+                'upload_date': '20150110',
+                'uploader': 'Various Artists - Topic',
+                'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
+                'artist': 'Disclosure',
+                'track': 'Latch Feat. Sam Smith',
+                'album': 'Latch Featuring Sam Smith',
+                'release_date': '20121008',
+                'release_year': 2012,
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
+        {
+            # Youtube Music Auto-generated description
+            # handle multiple artists on youtube music video
+            'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
+            'info_dict': {
+                'id': '74qn0eJSjpA',
+                'ext': 'mp4',
+                'title': 'Eastside',
+                'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
+                'upload_date': '20180710',
+                'uploader': 'Benny Blanco - Topic',
+                'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
+                'artist': 'benny blanco, Halsey, Khalid',
+                'track': 'Eastside',
+                'album': 'Eastside',
+                'release_date': '20180713',
+                'release_year': 2018,
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
+        {
+            # Youtube Music Auto-generated description
+            # handle youtube music video with release_year and no release_date
+            'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
+            'info_dict': {
+                'id': '-hcAI0g-f5M',
+                'ext': 'mp4',
+                'title': 'Put It On Me',
+                'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
+                'upload_date': '20180426',
+                'uploader': 'Matt Maeson - Topic',
+                'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
+                'artist': 'Matt Maeson',
+                'track': 'Put It On Me',
+                'album': 'The Hearse',
+                'release_date': None,
+                'release_year': 2018,
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
    ]

    def __init__(self, *args, **kwargs):
@ -2073,6 +2162,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):

        track = extract_meta('Song')
        artist = extract_meta('Artist')
+        album = extract_meta('Album')
+
+        # Youtube Music Auto-generated description
+        release_date = release_year = None
+        if video_description:
+            mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
+            if mobj:
+                if not track:
+                    track = mobj.group('track').strip()
+                if not artist:
+                    artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
+                if not album:
+                    album = mobj.group('album'.strip())
+                release_year = mobj.group('release_year')
+                release_date = mobj.group('release_date')
+                if release_date:
+                    release_date = release_date.replace('-', '')
+                    if not release_year:
+                        release_year = int(release_date[:4])
+                if release_year:
+                    release_year = int(release_year)

        m_episode = re.search(
            r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
@ -2226,6 +2336,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            'episode_number': episode_number,
            'track': track,
            'artist': artist,
+            'album': album,
+            'release_date': release_date,
+            'release_year': release_year,
        }