[Bandcamp] Extract additional fields (#1)

* [bandcamp] Add additional extraction fields Adding additional extraction fields * [bandcamp] remove redundant changes Removed redundant changes * [bandcamp] Remove extra spaces Removed extra spaces * [bandcamp] Minor corrections Correct order of return fields, minimize coding lines
2017-01-25 10:49:24 -06:00 · 2017-01-25 10:49:24 -06:00 · 1db3d48218
commit 1db3d48218
parent 1f393a3241
1 changed files with 24 additions and 0 deletions
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@ -47,6 +47,15 @@ class BandcampIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        title = mobj.group('title')
        webpage = self._download_webpage(url, title)
        album = self._search_regex(
            r'(?ms).*?title\s*?:\s*?"(?P<album>.*?)",',
            webpage, 'album')
        album_artist = self._search_regex(
            r'(?ms)var EmbedData = .*?[{,]\s*artist:\s*?"(?P<album_artist>.*?)",$',
            webpage, 'album artist')
        release_year = self._search_regex(
            r'(?ms).*?release_date"?:\s*?"\d+ \w+ (?P<release_year>\d+)\s*?.*?GMT",',
            webpage, 'release year')
        m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
        if not m_download:
            m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
@ -77,6 +86,12 @@ class BandcampIE(InfoExtractor):
                    'title': data['title'],
                    'formats': formats,
                    'duration': float_or_none(data.get('duration')),
                    'track': data['title'],
                    'track_number': data['track_num'],
                    'track_id': track_id,
                    'album': album,
                    'album_artist': album_artist,
                    'release_year': release_year,
                }
            else:
                raise ExtractorError('No free songs found')
@ -86,6 +101,10 @@ class BandcampIE(InfoExtractor):
            r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
            webpage, 'video id')
        track_number = self._search_regex(
            r'"track_num":(?P<track_number>\d+),',
            webpage, 'track number')
        download_webpage = self._download_webpage(
            download_link, video_id, 'Downloading free downloads page')
@ -148,6 +167,10 @@ class BandcampIE(InfoExtractor):
            'artist': artist,
            'track': track,
            'formats': formats,
            'track_number': track_number,
            'album': album,
            'album_artist': album_artist,
            'release_year': release_year,
        }
@ -233,5 +256,6 @@ class BandcampAlbumIE(InfoExtractor):
            'uploader_id': uploader_id,
            'id': playlist_id,
            'title': title,
            'album': title,
            'entries': entries,
        }