Merge pull request #1 from dabiboo/universalmusicfrance

Universalmusicfrance
This commit is contained in:
dabiboo 2015-09-20 10:10:23 +02:00
commit 935b9548dc
3 changed files with 177 additions and 67 deletions

View File

@ -687,6 +687,7 @@ from .udemy import (
) )
from .udn import UDNEmbedIE from .udn import UDNEmbedIE
from .ultimedia import UltimediaIE from .ultimedia import UltimediaIE
from .universalmusicfrance import UniversalMusicFranceIE
from .unistra import UnistraIE from .unistra import UnistraIE
from .urort import UrortIE from .urort import UrortIE
from .ustream import UstreamIE, UstreamChannelIE from .ustream import UstreamIE, UstreamChannelIE

View File

@ -0,0 +1,72 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import (
urlencode_postdata,
)
class UniversalMusicFranceIE(InfoExtractor):
_VALID_URL = r'https?://www\.universalmusic\.fr/artiste/.*/videos/(?P<id>.*)#?'
_TESTS = [
{
'url': 'http://www.universalmusic.fr/artiste/7415-anna-bergendahl/videos/4555-for-you-remix-lyric-video.iframe',
'md5': '159cda7568b9fc1e5e3de6aeca5d4bfc)',
'info_dict': {
'id': '1881-waiting-for-love-lyric-video',
'ext': 'mp4',
'title': '1881-waiting-for-love-lyric-video'
}
}
,
{
'url': 'https://www.universalmusic.fr/artiste/4428-avicii/videos/1881-waiting-for-love-lyric-video#contentPart',
'md5': '159cda7568b9fc1e5e3de6aeca5d4bfc)',
'info_dict': {
'id': '1881-waiting-for-love-lyric-video',
'ext': 'mp4',
'title': '1881-waiting-for-love-lyric-video'
}
}
,
{
# from http://www.wat.tv/video/anna-bergendahl-for-you-2015-7dvjn_76lkz_.html
'url': 'http://www.universalmusic.fr/artiste/7415-anna-bergendahl/videos/4555-for-you-remix-lyric-video',
'md5': '159cda7568b9fc1e5e3de6aeca5d4bfc)',
'info_dict': {
'id': '4555-for-you-remix-lyric-video',
'ext': 'mp4',
'title': 'anna-bergendahl - for-you'
}
}
]
GET_TOKEN_URL = 'http://www.universalmusic.fr/_/artiste/video/token'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
urlVideo = self._html_search_regex(r'var urlVideo = \'(.*)\';', webpage, 'urlVideo')
title = self._html_search_regex(r'<meta property="?og:title"? content="(.*)"/>', webpage, 'title')
request = compat_urllib_request.Request(self.GET_TOKEN_URL, urlencode_postdata({'videoUrl': urlVideo}))
request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
request.add_header('X-Requested-With', 'XMLHttpRequest')
manifest_json = self._download_webpage(request, None, note='Getting token', errnote='unable to get token')
manifestUrl = self._parse_json(manifest_json, video_id).get("video")
print(manifestUrl);
return {
'id': video_id,
'title': title,
'description': title,
'formats':
self._extract_m3u8_formats(
manifestUrl, video_id, 'mp4')
}

View File

@ -3,6 +3,8 @@ from __future__ import unicode_literals
import re import re
import hashlib import hashlib
import universalmusicfrance
import ultimedia
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -15,6 +17,29 @@ class WatIE(InfoExtractor):
_VALID_URL = r'http://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html' _VALID_URL = r'http://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html'
IE_NAME = 'wat.tv' IE_NAME = 'wat.tv'
_TESTS = [ _TESTS = [
{
'url': 'http://www.wat.tv/video/anna-bergendahl-for-you-2015-7dvjn_76lkz_.html',
'md5': '159cda7568b9fc1e5e3de6aeca5d4bfc',
'info_dict': {
'id': '4555-for-you-remix-lyric-video',
'display_id': '4555-for-you-remix-lyric-video',
'ext': 'mp4',
'title': 'For You - Anna Bergendahl - Universal Music France',
'description': 'md5:1bbdde8d44751f43367ba68e8b9966a6'
},
},
{
'url': 'http://www.wat.tv/video/david-guetta-titanium-feat-sia-4v6p5_4v69t_.html',
'md5': '5c31a70358cd5019595297a26390cd46',
'info_dict': {
'id': 'qzkfx3',
'display_id': 'qzkfx3',
'ext': 'mp4',
'title': 'David Guetta - Titanium feat. Sia (Clip)',
'description': 'md5:bb28f8c4a84586e2eb1c3d092ab94f4b',
'upload_date': '20111220'
},
},
{ {
'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html', 'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
'md5': 'ce70e9223945ed26a8056d413ca55dc9', 'md5': 'ce70e9223945ed26a8056d413ca55dc9',
@ -57,81 +82,93 @@ class WatIE(InfoExtractor):
short_id = mobj.group('short_id') short_id = mobj.group('short_id')
display_id = mobj.group('display_id') display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id or short_id) webpage = self._download_webpage(url, display_id or short_id)
real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id') srcIFrame = self._html_search_regex(r'<iframe .* src="(.*?)(.iframe)?"', webpage, 'srcIFrame')
if (srcIFrame.__contains__("universalmusic")):
#return universalmusicfrance.UniversalMusicFranceIE()._real_extract(srcIFrame);
print srcIFrame
return self.url_result(srcIFrame)
elif (srcIFrame.__contains__("ultimedia")):
mobj = re.match(r'http://www\.ultimedia\.com/deliver/musique/iframe/mdtk/[0-9]*/zone/[0-9]/article/(?P<article_id>.*?)/.*', srcIFrame)
article_id = mobj.group('article_id')
ultimedia_url = "http://www.ultimedia.com/default/index/videomusic/id/" + article_id
#return ultimedia.UltimediaIE()._real_extract(ultimedia_url)
return self.url_result(ultimedia_url)
else:
real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id')
video_info = self.download_video_info(real_id) video_info = self.download_video_info(real_id)
error_desc = video_info.get('error_desc') error_desc = video_info.get('error_desc')
if error_desc: if error_desc:
raise ExtractorError( raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, error_desc), expected=True) '%s returned error: %s' % (self.IE_NAME, error_desc), expected=True)
geo_list = video_info.get('geoList') geo_list = video_info.get('geoList')
country = geo_list[0] if geo_list else '' country = geo_list[0] if geo_list else ''
chapters = video_info['chapters'] chapters = video_info['chapters']
first_chapter = chapters[0] first_chapter = chapters[0]
files = video_info['files'] files = video_info['files']
first_file = files[0] first_file = files[0]
if real_id_for_chapter(first_chapter) != real_id: if real_id_for_chapter(first_chapter) != real_id:
self.to_screen('Multipart video detected') self.to_screen('Multipart video detected')
chapter_urls = [] chapter_urls = []
for chapter in chapters: for chapter in chapters:
chapter_id = real_id_for_chapter(chapter) chapter_id = real_id_for_chapter(chapter)
# Yes, when we this chapter is processed by WatIE, # Yes, when we this chapter is processed by WatIE,
# it will download the info again # it will download the info again
chapter_info = self.download_video_info(chapter_id) chapter_info = self.download_video_info(chapter_id)
chapter_urls.append(chapter_info['url']) chapter_urls.append(chapter_info['url'])
entries = [self.url_result(chapter_url) for chapter_url in chapter_urls] entries = [self.url_result(chapter_url) for chapter_url in chapter_urls]
return self.playlist_result(entries, real_id, video_info['title']) return self.playlist_result(entries, real_id, video_info['title'])
upload_date = None upload_date = None
if 'date_diffusion' in first_chapter: if 'date_diffusion' in first_chapter:
upload_date = unified_strdate(first_chapter['date_diffusion']) upload_date = unified_strdate(first_chapter['date_diffusion'])
# Otherwise we can continue and extract just one part, we have to use # Otherwise we can continue and extract just one part, we have to use
# the short id for getting the video url # the short id for getting the video url
formats = [{ formats = [{
'url': 'http://wat.tv/get/android5/%s.mp4' % real_id, 'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
'format_id': 'Mobile', 'format_id': 'Mobile',
}] }]
fmts = [('SD', 'web')] fmts = [('SD', 'web')]
if first_file.get('hasHD'): if first_file.get('hasHD'):
fmts.append(('HD', 'webhd')) fmts.append(('HD', 'webhd'))
def compute_token(param): def compute_token(param):
timestamp = '%08x' % int(self._download_webpage( timestamp = '%08x' % int(self._download_webpage(
'http://www.wat.tv/servertime', real_id, 'http://www.wat.tv/servertime', real_id,
'Downloading server time').split('|')[0]) 'Downloading server time').split('|')[0])
magic = '9b673b13fa4682ed14c3cfa5af5310274b514c4133e9b3a81e6e3aba009l2564' magic = '9b673b13fa4682ed14c3cfa5af5310274b514c4133e9b3a81e6e3aba009l2564'
return '%s/%s' % (hashlib.md5((magic + param + timestamp).encode('ascii')).hexdigest(), timestamp) return '%s/%s' % (hashlib.md5((magic + param + timestamp).encode('ascii')).hexdigest(), timestamp)
for fmt in fmts: for fmt in fmts:
webid = '/%s/%s' % (fmt[1], real_id) webid = '/%s/%s' % (fmt[1], real_id)
video_url = self._download_webpage( video_url = self._download_webpage(
'http://www.wat.tv/get%s?token=%s&getURL=1&country=%s' % (webid, compute_token(webid), country), 'http://www.wat.tv/get%s?token=%s&getURL=1&country=%s' % (webid, compute_token(webid), country),
real_id, real_id,
'Downloading %s video URL' % fmt[0], 'Downloading %s video URL' % fmt[0],
'Failed to download %s video URL' % fmt[0], 'Failed to download %s video URL' % fmt[0],
False) False)
if not video_url: if not video_url:
continue continue
formats.append({ formats.append({
'url': video_url, 'url': video_url,
'ext': 'mp4', 'ext': 'mp4',
'format_id': fmt[0], 'format_id': fmt[0],
}) })
return { return {
'id': real_id, 'id': real_id,
'display_id': display_id, 'display_id': display_id,
'title': first_chapter['title'], 'title': first_chapter['title'],
'thumbnail': first_chapter['preview'], 'thumbnail': first_chapter['preview'],
'description': first_chapter['description'], 'description': first_chapter['description'],
'view_count': video_info['views'], 'view_count': video_info['views'],
'upload_date': upload_date, 'upload_date': upload_date,
'duration': first_file['duration'], 'duration': first_file['duration'],
'formats': formats, 'formats': formats,
} }