[steam] Add extractor for live broadcasts (#6012)

This commit is contained in:
Jaime Marquínez Ferrándiz 2015-07-21 19:24:20 +02:00
parent 675a966176
commit f9775ae86e
3 changed files with 140 additions and 13 deletions

View File

@ -1,9 +1,19 @@
from __future__ import unicode_literals
import itertools
import re
import time
import xml.etree.ElementTree as etree
from .common import FileDownloader
from ..compat import compat_urllib_request
from ..compat import (
compat_str,
compat_urllib_request,
)
from ..utils import (
parse_iso8601,
xpath_with_ns,
)
class DashSegmentsFD(FileDownloader):
@ -13,9 +23,6 @@ class DashSegmentsFD(FileDownloader):
def real_download(self, filename, info_dict):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
base_url = info_dict['url']
segment_urls = info_dict['segment_urls']
is_test = self.params.get('test', False)
remaining_bytes = self._TEST_FILE_SIZE if is_test else None
byte_counter = 0
@ -34,21 +41,63 @@ class DashSegmentsFD(FileDownloader):
outf.write(data)
return len(data)
def combine_url(base_url, target_url):
if re.match(r'^https?://', target_url):
return target_url
return '%s/%s' % (base_url, target_url)
if not info_dict.get('is_live'):
base_url = info_dict['url']
segment_urls = info_dict['segment_urls']
def combine_url(base_url, target_url):
if re.match(r'^https?://', target_url):
return target_url
return '%s/%s' % (base_url, target_url)
init_url = combine_url(base_url, info_dict['initialization_url'])
segment_urls = [combine_url(base_url, segment_url) for segment_url in segment_urls]
else:
manifest_url = info_dict['url']
manifest_xml = self.ydl.urlopen(manifest_url).read()
manifest = etree.fromstring(manifest_xml)
_x = lambda p: xpath_with_ns(p, {'ns': 'urn:mpeg:DASH:schema:MPD:2011'})
ad = [e for e in manifest.findall(_x('ns:Period/ns:AdaptationSet')) if e.attrib['id'] == info_dict['mpd_set_id']][0]
segment_template = ad.find(_x('ns:SegmentTemplate'))
def subs_url_template(url_template, repr_id, number=None):
result = url_template.replace('$RepresentationID$', repr_id)
if number is not None:
result = result.replace('$Number$', compat_str(number))
return result
start_time = parse_iso8601(manifest.attrib['availabilityStartTime'])
segment_duration = (int(segment_template.attrib['duration']) / int(segment_template.attrib['timescale'])) # in seconds
first_segment = int((int(time.time()) - start_time) / segment_duration)
init_url = subs_url_template(segment_template.attrib['initialization'], '1')
def build_live_segment_urls():
for nr in itertools.count(first_segment):
# We have to avoid requesting a segment before its start time
expected_time = start_time + nr * segment_duration
wait_time = expected_time - time.time()
if wait_time > 0:
time.sleep(wait_time)
yield subs_url_template(segment_template.attrib['media'], '1', nr)
segment_urls = build_live_segment_urls()
with open(tmpfilename, 'wb') as outf:
append_url_to_file(
outf, combine_url(base_url, info_dict['initialization_url']),
outf, init_url,
'initialization segment')
for i, segment_url in enumerate(segment_urls):
note = 'segment %d' % (i + 1)
if not info_dict.get('is_live'):
note += ' / %d' % len(segment_urls)
segment_len = append_url_to_file(
outf, combine_url(base_url, segment_url),
'segment %d / %d' % (i + 1, len(segment_urls)),
remaining_bytes)
outf, segment_url, note, remaining_bytes)
byte_counter += segment_len
self._hook_progress({
'status': 'downloading',
'downloaded_bytes': byte_counter,
'filename': filename,
})
if remaining_bytes is not None:
remaining_bytes -= segment_len
if remaining_bytes <= 0:

View File

@ -561,7 +561,10 @@ from .srf import SrfIE
from .srmediathek import SRMediathekIE
from .ssa import SSAIE
from .stanfordoc import StanfordOpenClassroomIE
from .steam import SteamIE
from .steam import (
SteamIE,
SteamBroadcastsIE,
)
from .streamcloud import StreamcloudIE
from .streamcz import StreamCZIE
from .streetvoice import StreetVoiceIE

View File

@ -5,7 +5,9 @@ import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
unescapeHTML,
xpath_with_ns,
)
@ -121,3 +123,76 @@ class SteamIE(InfoExtractor):
raise ExtractorError('Could not find any videos')
return self.playlist_result(videos, playlist_id, playlist_title)
class SteamBroadcastsIE(InfoExtractor):
IE_DESC = 'Steam and Dota 2 live broadcasts'
_VALID_URL = r'https?://(?:www\.)?(?:steamcommunity\.com/broadcast|dota2\.com)/watch/(?P<id>\d+)'
# Only livestreams, test urls can be obtained from
# https://steamcommunity.com/?subsection=broadcasts or
# https://www.dota2.com/watch/
_TESTS = [
{
'url': 'http://www.dota2.com/watch/76561197986987526',
'only_matching': True,
},
{
'url': 'https://steamcommunity.com/broadcast/watch/76561197986987526',
'only_matching': True,
},
]
def _extract_dash_manifest_formats(self, manifest_url, video_id):
manifest = self._download_xml(manifest_url, video_id)
_x = lambda p: xpath_with_ns(p, {'ns': 'urn:mpeg:DASH:schema:MPD:2011'})
formats = []
for ad_set in manifest.findall(_x('ns:Period/ns:AdaptationSet')):
set_id = ad_set.attrib['id']
if set_id == 'game':
continue
for repr in ad_set.findall(_x('ns:Representation')):
repr_id = repr.attrib['id']
if set_id == 'audio':
ext = 'm4a'
vcodec = 'none'
acodec = repr.attrib.get('codecs')
preference = -10
else:
ext = 'mp4'
vcodec = repr.attrib.get('codecs')
acodec = 'none'
preference = 0
formats.append({
'url': manifest_url,
'ext': ext,
'format_id': '{0}-{1}'.format(set_id, repr_id),
'protocol': 'http_dash_segments',
'mpd_set_id': set_id,
'mpd_representation_id': repr_id,
'height': int_or_none(repr.attrib.get('height')),
'width': int_or_none(repr.attrib.get('width')),
'vcodec': vcodec,
'acodec': acodec,
'preference': preference,
})
return formats
def _real_extract(self, url):
steamid = self._match_id(url)
broadcast_mpd_info = self._download_json('https://steamcommunity.com/broadcast/getbroadcastmpd/?steamid={0}&broadcastid=0'.format(steamid), steamid)
broadcast_id = broadcast_mpd_info['broadcastid']
broadcast_info = self._download_json('https://steamcommunity.com/broadcast/getbroadcastinfo/?steamid={0}&broadcastid={1}'.format(steamid, broadcast_id), steamid)
manifest_url = broadcast_mpd_info['url']
formats = self._extract_dash_manifest_formats(manifest_url, steamid)
self._sort_formats(formats)
return {
'id': steamid,
'title': broadcast_info['title'],
'formats': formats,
'is_live': True,
}