[steam] Add extractor for live broadcasts (#6012)
This commit is contained in:
parent
675a966176
commit
f9775ae86e
@ -1,9 +1,19 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import itertools
|
||||||
import re
|
import re
|
||||||
|
import time
|
||||||
|
import xml.etree.ElementTree as etree
|
||||||
|
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from ..compat import compat_urllib_request
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_request,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
parse_iso8601,
|
||||||
|
xpath_with_ns,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class DashSegmentsFD(FileDownloader):
|
class DashSegmentsFD(FileDownloader):
|
||||||
@ -13,9 +23,6 @@ class DashSegmentsFD(FileDownloader):
|
|||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
self.report_destination(filename)
|
self.report_destination(filename)
|
||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
base_url = info_dict['url']
|
|
||||||
segment_urls = info_dict['segment_urls']
|
|
||||||
|
|
||||||
is_test = self.params.get('test', False)
|
is_test = self.params.get('test', False)
|
||||||
remaining_bytes = self._TEST_FILE_SIZE if is_test else None
|
remaining_bytes = self._TEST_FILE_SIZE if is_test else None
|
||||||
byte_counter = 0
|
byte_counter = 0
|
||||||
@ -34,21 +41,63 @@ class DashSegmentsFD(FileDownloader):
|
|||||||
outf.write(data)
|
outf.write(data)
|
||||||
return len(data)
|
return len(data)
|
||||||
|
|
||||||
def combine_url(base_url, target_url):
|
if not info_dict.get('is_live'):
|
||||||
if re.match(r'^https?://', target_url):
|
base_url = info_dict['url']
|
||||||
return target_url
|
segment_urls = info_dict['segment_urls']
|
||||||
return '%s/%s' % (base_url, target_url)
|
|
||||||
|
def combine_url(base_url, target_url):
|
||||||
|
if re.match(r'^https?://', target_url):
|
||||||
|
return target_url
|
||||||
|
return '%s/%s' % (base_url, target_url)
|
||||||
|
|
||||||
|
init_url = combine_url(base_url, info_dict['initialization_url'])
|
||||||
|
segment_urls = [combine_url(base_url, segment_url) for segment_url in segment_urls]
|
||||||
|
|
||||||
|
else:
|
||||||
|
manifest_url = info_dict['url']
|
||||||
|
manifest_xml = self.ydl.urlopen(manifest_url).read()
|
||||||
|
manifest = etree.fromstring(manifest_xml)
|
||||||
|
_x = lambda p: xpath_with_ns(p, {'ns': 'urn:mpeg:DASH:schema:MPD:2011'})
|
||||||
|
ad = [e for e in manifest.findall(_x('ns:Period/ns:AdaptationSet')) if e.attrib['id'] == info_dict['mpd_set_id']][0]
|
||||||
|
segment_template = ad.find(_x('ns:SegmentTemplate'))
|
||||||
|
|
||||||
|
def subs_url_template(url_template, repr_id, number=None):
|
||||||
|
result = url_template.replace('$RepresentationID$', repr_id)
|
||||||
|
if number is not None:
|
||||||
|
result = result.replace('$Number$', compat_str(number))
|
||||||
|
return result
|
||||||
|
|
||||||
|
start_time = parse_iso8601(manifest.attrib['availabilityStartTime'])
|
||||||
|
segment_duration = (int(segment_template.attrib['duration']) / int(segment_template.attrib['timescale'])) # in seconds
|
||||||
|
first_segment = int((int(time.time()) - start_time) / segment_duration)
|
||||||
|
init_url = subs_url_template(segment_template.attrib['initialization'], '1')
|
||||||
|
|
||||||
|
def build_live_segment_urls():
|
||||||
|
for nr in itertools.count(first_segment):
|
||||||
|
# We have to avoid requesting a segment before its start time
|
||||||
|
expected_time = start_time + nr * segment_duration
|
||||||
|
wait_time = expected_time - time.time()
|
||||||
|
if wait_time > 0:
|
||||||
|
time.sleep(wait_time)
|
||||||
|
yield subs_url_template(segment_template.attrib['media'], '1', nr)
|
||||||
|
segment_urls = build_live_segment_urls()
|
||||||
|
|
||||||
with open(tmpfilename, 'wb') as outf:
|
with open(tmpfilename, 'wb') as outf:
|
||||||
append_url_to_file(
|
append_url_to_file(
|
||||||
outf, combine_url(base_url, info_dict['initialization_url']),
|
outf, init_url,
|
||||||
'initialization segment')
|
'initialization segment')
|
||||||
for i, segment_url in enumerate(segment_urls):
|
for i, segment_url in enumerate(segment_urls):
|
||||||
|
note = 'segment %d' % (i + 1)
|
||||||
|
if not info_dict.get('is_live'):
|
||||||
|
note += ' / %d' % len(segment_urls)
|
||||||
segment_len = append_url_to_file(
|
segment_len = append_url_to_file(
|
||||||
outf, combine_url(base_url, segment_url),
|
outf, segment_url, note, remaining_bytes)
|
||||||
'segment %d / %d' % (i + 1, len(segment_urls)),
|
|
||||||
remaining_bytes)
|
|
||||||
byte_counter += segment_len
|
byte_counter += segment_len
|
||||||
|
self._hook_progress({
|
||||||
|
'status': 'downloading',
|
||||||
|
'downloaded_bytes': byte_counter,
|
||||||
|
'filename': filename,
|
||||||
|
})
|
||||||
if remaining_bytes is not None:
|
if remaining_bytes is not None:
|
||||||
remaining_bytes -= segment_len
|
remaining_bytes -= segment_len
|
||||||
if remaining_bytes <= 0:
|
if remaining_bytes <= 0:
|
||||||
|
@ -561,7 +561,10 @@ from .srf import SrfIE
|
|||||||
from .srmediathek import SRMediathekIE
|
from .srmediathek import SRMediathekIE
|
||||||
from .ssa import SSAIE
|
from .ssa import SSAIE
|
||||||
from .stanfordoc import StanfordOpenClassroomIE
|
from .stanfordoc import StanfordOpenClassroomIE
|
||||||
from .steam import SteamIE
|
from .steam import (
|
||||||
|
SteamIE,
|
||||||
|
SteamBroadcastsIE,
|
||||||
|
)
|
||||||
from .streamcloud import StreamcloudIE
|
from .streamcloud import StreamcloudIE
|
||||||
from .streamcz import StreamCZIE
|
from .streamcz import StreamCZIE
|
||||||
from .streetvoice import StreetVoiceIE
|
from .streetvoice import StreetVoiceIE
|
||||||
|
@ -5,7 +5,9 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -121,3 +123,76 @@ class SteamIE(InfoExtractor):
|
|||||||
raise ExtractorError('Could not find any videos')
|
raise ExtractorError('Could not find any videos')
|
||||||
|
|
||||||
return self.playlist_result(videos, playlist_id, playlist_title)
|
return self.playlist_result(videos, playlist_id, playlist_title)
|
||||||
|
|
||||||
|
|
||||||
|
class SteamBroadcastsIE(InfoExtractor):
|
||||||
|
IE_DESC = 'Steam and Dota 2 live broadcasts'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(?:steamcommunity\.com/broadcast|dota2\.com)/watch/(?P<id>\d+)'
|
||||||
|
|
||||||
|
# Only livestreams, test urls can be obtained from
|
||||||
|
# https://steamcommunity.com/?subsection=broadcasts or
|
||||||
|
# https://www.dota2.com/watch/
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.dota2.com/watch/76561197986987526',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://steamcommunity.com/broadcast/watch/76561197986987526',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _extract_dash_manifest_formats(self, manifest_url, video_id):
|
||||||
|
manifest = self._download_xml(manifest_url, video_id)
|
||||||
|
|
||||||
|
_x = lambda p: xpath_with_ns(p, {'ns': 'urn:mpeg:DASH:schema:MPD:2011'})
|
||||||
|
formats = []
|
||||||
|
for ad_set in manifest.findall(_x('ns:Period/ns:AdaptationSet')):
|
||||||
|
set_id = ad_set.attrib['id']
|
||||||
|
if set_id == 'game':
|
||||||
|
continue
|
||||||
|
for repr in ad_set.findall(_x('ns:Representation')):
|
||||||
|
repr_id = repr.attrib['id']
|
||||||
|
if set_id == 'audio':
|
||||||
|
ext = 'm4a'
|
||||||
|
vcodec = 'none'
|
||||||
|
acodec = repr.attrib.get('codecs')
|
||||||
|
preference = -10
|
||||||
|
else:
|
||||||
|
ext = 'mp4'
|
||||||
|
vcodec = repr.attrib.get('codecs')
|
||||||
|
acodec = 'none'
|
||||||
|
preference = 0
|
||||||
|
formats.append({
|
||||||
|
'url': manifest_url,
|
||||||
|
'ext': ext,
|
||||||
|
'format_id': '{0}-{1}'.format(set_id, repr_id),
|
||||||
|
'protocol': 'http_dash_segments',
|
||||||
|
'mpd_set_id': set_id,
|
||||||
|
'mpd_representation_id': repr_id,
|
||||||
|
'height': int_or_none(repr.attrib.get('height')),
|
||||||
|
'width': int_or_none(repr.attrib.get('width')),
|
||||||
|
'vcodec': vcodec,
|
||||||
|
'acodec': acodec,
|
||||||
|
'preference': preference,
|
||||||
|
})
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
steamid = self._match_id(url)
|
||||||
|
|
||||||
|
broadcast_mpd_info = self._download_json('https://steamcommunity.com/broadcast/getbroadcastmpd/?steamid={0}&broadcastid=0'.format(steamid), steamid)
|
||||||
|
broadcast_id = broadcast_mpd_info['broadcastid']
|
||||||
|
broadcast_info = self._download_json('https://steamcommunity.com/broadcast/getbroadcastinfo/?steamid={0}&broadcastid={1}'.format(steamid, broadcast_id), steamid)
|
||||||
|
|
||||||
|
manifest_url = broadcast_mpd_info['url']
|
||||||
|
formats = self._extract_dash_manifest_formats(manifest_url, steamid)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': steamid,
|
||||||
|
'title': broadcast_info['title'],
|
||||||
|
'formats': formats,
|
||||||
|
'is_live': True,
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user