[Bandcamp] Extract additional fields (#1)

* [bandcamp] Add additional extraction fields

Adding additional extraction fields

* [bandcamp] remove redundant changes

Removed redundant changes

* [bandcamp] Remove extra spaces

Removed extra spaces

* [bandcamp] Minor corrections

Correct order of return fields, minimize coding lines
This commit is contained in:
gkoelln 2017-01-25 10:49:24 -06:00 committed by GitHub
parent 1f393a3241
commit 1db3d48218

View File

@ -47,6 +47,15 @@ class BandcampIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
title = mobj.group('title') title = mobj.group('title')
webpage = self._download_webpage(url, title) webpage = self._download_webpage(url, title)
album = self._search_regex(
r'(?ms).*?title\s*?:\s*?"(?P<album>.*?)",',
webpage, 'album')
album_artist = self._search_regex(
r'(?ms)var EmbedData = .*?[{,]\s*artist:\s*?"(?P<album_artist>.*?)",$',
webpage, 'album artist')
release_year = self._search_regex(
r'(?ms).*?release_date"?:\s*?"\d+ \w+ (?P<release_year>\d+)\s*?.*?GMT",',
webpage, 'release year')
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage) m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
if not m_download: if not m_download:
m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage) m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
@ -77,6 +86,12 @@ class BandcampIE(InfoExtractor):
'title': data['title'], 'title': data['title'],
'formats': formats, 'formats': formats,
'duration': float_or_none(data.get('duration')), 'duration': float_or_none(data.get('duration')),
'track': data['title'],
'track_number': data['track_num'],
'track_id': track_id,
'album': album,
'album_artist': album_artist,
'release_year': release_year,
} }
else: else:
raise ExtractorError('No free songs found') raise ExtractorError('No free songs found')
@ -86,6 +101,10 @@ class BandcampIE(InfoExtractor):
r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$', r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
webpage, 'video id') webpage, 'video id')
track_number = self._search_regex(
r'"track_num":(?P<track_number>\d+),',
webpage, 'track number')
download_webpage = self._download_webpage( download_webpage = self._download_webpage(
download_link, video_id, 'Downloading free downloads page') download_link, video_id, 'Downloading free downloads page')
@ -148,6 +167,10 @@ class BandcampIE(InfoExtractor):
'artist': artist, 'artist': artist,
'track': track, 'track': track,
'formats': formats, 'formats': formats,
'track_number': track_number,
'album': album,
'album_artist': album_artist,
'release_year': release_year,
} }
@ -233,5 +256,6 @@ class BandcampAlbumIE(InfoExtractor):
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'id': playlist_id, 'id': playlist_id,
'title': title, 'title': title,
'album': title,
'entries': entries, 'entries': entries,
} }