[steam] Add extractor for live broadcasts (#6012)

This commit is contained in:
Jaime Marquínez Ferrándiz 2015-07-21 19:24:20 +02:00
parent 675a966176
commit f9775ae86e
3 changed files with 140 additions and 13 deletions

View File

@ -1,9 +1,19 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import itertools
import re import re
import time
import xml.etree.ElementTree as etree
from .common import FileDownloader from .common import FileDownloader
from ..compat import compat_urllib_request from ..compat import (
compat_str,
compat_urllib_request,
)
from ..utils import (
parse_iso8601,
xpath_with_ns,
)
class DashSegmentsFD(FileDownloader): class DashSegmentsFD(FileDownloader):
@ -13,9 +23,6 @@ class DashSegmentsFD(FileDownloader):
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
self.report_destination(filename) self.report_destination(filename)
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
base_url = info_dict['url']
segment_urls = info_dict['segment_urls']
is_test = self.params.get('test', False) is_test = self.params.get('test', False)
remaining_bytes = self._TEST_FILE_SIZE if is_test else None remaining_bytes = self._TEST_FILE_SIZE if is_test else None
byte_counter = 0 byte_counter = 0
@ -34,21 +41,63 @@ class DashSegmentsFD(FileDownloader):
outf.write(data) outf.write(data)
return len(data) return len(data)
def combine_url(base_url, target_url): if not info_dict.get('is_live'):
if re.match(r'^https?://', target_url): base_url = info_dict['url']
return target_url segment_urls = info_dict['segment_urls']
return '%s/%s' % (base_url, target_url)
def combine_url(base_url, target_url):
if re.match(r'^https?://', target_url):
return target_url
return '%s/%s' % (base_url, target_url)
init_url = combine_url(base_url, info_dict['initialization_url'])
segment_urls = [combine_url(base_url, segment_url) for segment_url in segment_urls]
else:
manifest_url = info_dict['url']
manifest_xml = self.ydl.urlopen(manifest_url).read()
manifest = etree.fromstring(manifest_xml)
_x = lambda p: xpath_with_ns(p, {'ns': 'urn:mpeg:DASH:schema:MPD:2011'})
ad = [e for e in manifest.findall(_x('ns:Period/ns:AdaptationSet')) if e.attrib['id'] == info_dict['mpd_set_id']][0]
segment_template = ad.find(_x('ns:SegmentTemplate'))
def subs_url_template(url_template, repr_id, number=None):
result = url_template.replace('$RepresentationID$', repr_id)
if number is not None:
result = result.replace('$Number$', compat_str(number))
return result
start_time = parse_iso8601(manifest.attrib['availabilityStartTime'])
segment_duration = (int(segment_template.attrib['duration']) / int(segment_template.attrib['timescale'])) # in seconds
first_segment = int((int(time.time()) - start_time) / segment_duration)
init_url = subs_url_template(segment_template.attrib['initialization'], '1')
def build_live_segment_urls():
for nr in itertools.count(first_segment):
# We have to avoid requesting a segment before its start time
expected_time = start_time + nr * segment_duration
wait_time = expected_time - time.time()
if wait_time > 0:
time.sleep(wait_time)
yield subs_url_template(segment_template.attrib['media'], '1', nr)
segment_urls = build_live_segment_urls()
with open(tmpfilename, 'wb') as outf: with open(tmpfilename, 'wb') as outf:
append_url_to_file( append_url_to_file(
outf, combine_url(base_url, info_dict['initialization_url']), outf, init_url,
'initialization segment') 'initialization segment')
for i, segment_url in enumerate(segment_urls): for i, segment_url in enumerate(segment_urls):
note = 'segment %d' % (i + 1)
if not info_dict.get('is_live'):
note += ' / %d' % len(segment_urls)
segment_len = append_url_to_file( segment_len = append_url_to_file(
outf, combine_url(base_url, segment_url), outf, segment_url, note, remaining_bytes)
'segment %d / %d' % (i + 1, len(segment_urls)),
remaining_bytes)
byte_counter += segment_len byte_counter += segment_len
self._hook_progress({
'status': 'downloading',
'downloaded_bytes': byte_counter,
'filename': filename,
})
if remaining_bytes is not None: if remaining_bytes is not None:
remaining_bytes -= segment_len remaining_bytes -= segment_len
if remaining_bytes <= 0: if remaining_bytes <= 0:

View File

@ -561,7 +561,10 @@ from .srf import SrfIE
from .srmediathek import SRMediathekIE from .srmediathek import SRMediathekIE
from .ssa import SSAIE from .ssa import SSAIE
from .stanfordoc import StanfordOpenClassroomIE from .stanfordoc import StanfordOpenClassroomIE
from .steam import SteamIE from .steam import (
SteamIE,
SteamBroadcastsIE,
)
from .streamcloud import StreamcloudIE from .streamcloud import StreamcloudIE
from .streamcz import StreamCZIE from .streamcz import StreamCZIE
from .streetvoice import StreetVoiceIE from .streetvoice import StreetVoiceIE

View File

@ -5,7 +5,9 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none,
unescapeHTML, unescapeHTML,
xpath_with_ns,
) )
@ -121,3 +123,76 @@ class SteamIE(InfoExtractor):
raise ExtractorError('Could not find any videos') raise ExtractorError('Could not find any videos')
return self.playlist_result(videos, playlist_id, playlist_title) return self.playlist_result(videos, playlist_id, playlist_title)
class SteamBroadcastsIE(InfoExtractor):
IE_DESC = 'Steam and Dota 2 live broadcasts'
_VALID_URL = r'https?://(?:www\.)?(?:steamcommunity\.com/broadcast|dota2\.com)/watch/(?P<id>\d+)'
# Only livestreams, test urls can be obtained from
# https://steamcommunity.com/?subsection=broadcasts or
# https://www.dota2.com/watch/
_TESTS = [
{
'url': 'http://www.dota2.com/watch/76561197986987526',
'only_matching': True,
},
{
'url': 'https://steamcommunity.com/broadcast/watch/76561197986987526',
'only_matching': True,
},
]
def _extract_dash_manifest_formats(self, manifest_url, video_id):
manifest = self._download_xml(manifest_url, video_id)
_x = lambda p: xpath_with_ns(p, {'ns': 'urn:mpeg:DASH:schema:MPD:2011'})
formats = []
for ad_set in manifest.findall(_x('ns:Period/ns:AdaptationSet')):
set_id = ad_set.attrib['id']
if set_id == 'game':
continue
for repr in ad_set.findall(_x('ns:Representation')):
repr_id = repr.attrib['id']
if set_id == 'audio':
ext = 'm4a'
vcodec = 'none'
acodec = repr.attrib.get('codecs')
preference = -10
else:
ext = 'mp4'
vcodec = repr.attrib.get('codecs')
acodec = 'none'
preference = 0
formats.append({
'url': manifest_url,
'ext': ext,
'format_id': '{0}-{1}'.format(set_id, repr_id),
'protocol': 'http_dash_segments',
'mpd_set_id': set_id,
'mpd_representation_id': repr_id,
'height': int_or_none(repr.attrib.get('height')),
'width': int_or_none(repr.attrib.get('width')),
'vcodec': vcodec,
'acodec': acodec,
'preference': preference,
})
return formats
def _real_extract(self, url):
steamid = self._match_id(url)
broadcast_mpd_info = self._download_json('https://steamcommunity.com/broadcast/getbroadcastmpd/?steamid={0}&broadcastid=0'.format(steamid), steamid)
broadcast_id = broadcast_mpd_info['broadcastid']
broadcast_info = self._download_json('https://steamcommunity.com/broadcast/getbroadcastinfo/?steamid={0}&broadcastid={1}'.format(steamid, broadcast_id), steamid)
manifest_url = broadcast_mpd_info['url']
formats = self._extract_dash_manifest_formats(manifest_url, steamid)
self._sort_formats(formats)
return {
'id': steamid,
'title': broadcast_info['title'],
'formats': formats,
'is_live': True,
}