[freesound] Improve metadata extraction

This commit is contained in:
Déstin Reed 2016-10-01 15:12:45 +02:00
parent 63e0fd5bcc
commit 6bd7fb9df5

View File

@ -3,6 +3,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import get_element_by_id
class FreesoundIE(InfoExtractor): class FreesoundIE(InfoExtractor):
@ -20,20 +21,18 @@ class FreesoundIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) music_id = self._match_id(url)
music_id = mobj.group('id')
webpage = self._download_webpage(url, music_id) webpage = self._download_webpage(url, music_id)
title = self._html_search_regex(
r'<div id="single_sample_header">.*?<a href="#">(.+?)</a>', title = self._og_search_property('audio:title', webpage)
webpage, 'music title', flags=re.DOTALL) description = re.sub(r'</?p>', '', get_element_by_id('sound_description',
description = self._html_search_regex( webpage).strip())
r'<div id="sound_description">(.*?)</div>', webpage, 'description',
fatal=False, flags=re.DOTALL)
return { return {
'id': music_id, 'id': music_id,
'title': title, 'title': title,
'url': self._og_search_property('audio', webpage, 'music url'), 'url': self._og_search_property('audio', webpage, 'music url'),
'uploader': self._og_search_property('audio:artist', webpage, 'music uploader'), 'uploader': self._og_search_property('audio:artist', webpage,
'music uploader', fatal=False),
'description': description, 'description': description,
} }