[Brightcove] support bostonglobe.com's use

BrightcoveNewIE.extract_urls(): Handle player data when it is all in
attributes of the <video> tag, and also the id is
data-brightcove-video-id not data-video-id.

Add test to generic extractor, note HEAD gives 404, no checksum computed(?).
This commit is contained in:
John Hawkinson 2017-02-12 08:26:36 -05:00
parent 459818e280
commit b096e61098
2 changed files with 38 additions and 1 deletions

View File

@ -508,7 +508,22 @@ class BrightcoveNewIE(InfoExtractor):
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
% (account_id, player_id, embed, video_id))
return entries
# <video data-brightcove-video-id="5320421710001" data-account="245991542" data-player="SJWAiyYWg" data-embed="default" class="video-js" controls itemscope itemtype="http://schema.org/VideoObject">
for video_id, account_id, player_id, embed in re.findall(
r'''(?sx)
<video[^>]+
data-brightcove-video-id=["\'](\d+|ref:[^"\']+)["\'].*?
data-account=["\'](\d+)["\'].*?
data-player=["\'](\w+)["\'].*?
data-embed=["\'](\w+)["\'].*?
</video>
''', webpage):
entries.append(
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
% (account_id, player_id, embed, video_id))
return entries
def _real_extract(self, url):
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()

View File

@ -447,6 +447,28 @@ class GenericIE(InfoExtractor):
},
}],
},
{
# Brightcove with metadata on one line
'url': 'http://www.bostonglobe.com/metro/2017/02/11/tree-finally-succumbs-disease-leaving-hole-neighborhood/h1b4lviqzMTIn9sVy8F3gP/story.html?',
'info_dict': {
'id': 'story',
'title': 'A tree finally succumbs to disease, leaving a hole in a neighborhood - The Boston Globe',
},
'playlist': [{
'info_dict': {
'id': '5320421710001',
'ext': 'mp4',
'title': 'A tree finally succumbs to disease, leaving a hole in a neighborhood',
'description': 'It arrived as a sapling when the Back Bay was in its infancy, a spindly American elm tamped down into a square of dirt cut into the brick sidewalk of 1880s Marlborough Street, no higher than the first bay window of the new brownstone behind it.',
'timestamp': 1486877593,
'upload_date': '20170212',
'uploader_id': '245991542',
},
}],
# HEAD requests produce 404 :(
'expected_warnings': ['404'],
},
# ooyala video
{
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',