Merge branch 'master' into GoogleDrive-issue-13619

This commit is contained in:
Parmjit Virk 2017-08-04 19:47:05 -05:00
commit 9cfc81df17
12 changed files with 123 additions and 93 deletions

View File

@ -584,7 +584,7 @@ If you are using an output template inside a Windows batch file then you must es
#### Output template examples #### Output template examples
Note on Windows you may need to use double quotes instead of single. Note that on Windows you may need to use double quotes instead of single.
```bash ```bash
$ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc $ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc
@ -671,7 +671,7 @@ If you want to preserve the old format selection behavior (prior to youtube-dl 2
#### Format selection examples #### Format selection examples
Note on Windows you may need to use double quotes instead of single. Note that on Windows you may need to use double quotes instead of single.
```bash ```bash
# Download best mp4 format available or any other best if no mp4 available # Download best mp4 format available or any other best if no mp4 available

View File

@ -371,6 +371,19 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL({'format': 'best[height>360]'}) ydl = YDL({'format': 'best[height>360]'})
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
def test_format_selection_issue_10083(self):
# See https://github.com/rg3/youtube-dl/issues/10083
formats = [
{'format_id': 'regular', 'height': 360, 'url': TEST_URL},
{'format_id': 'video', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
{'format_id': 'audio', 'vcodec': 'none', 'url': TEST_URL},
]
info_dict = _make_result(formats)
ydl = YDL({'format': 'best[height>360]/bestvideo[height>360]+bestaudio'})
ydl.process_ie_result(info_dict.copy())
self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'video+audio')
def test_invalid_format_specs(self): def test_invalid_format_specs(self):
def assert_syntax_error(format_spec): def assert_syntax_error(format_spec):
ydl = YDL({'format': format_spec}) ydl = YDL({'format': format_spec})

View File

@ -2,6 +2,7 @@ from __future__ import unicode_literals
from .fragment import FragmentFD from .fragment import FragmentFD
from ..compat import compat_urllib_error from ..compat import compat_urllib_error
from ..utils import urljoin
class DashSegmentsFD(FragmentFD): class DashSegmentsFD(FragmentFD):
@ -12,12 +13,13 @@ class DashSegmentsFD(FragmentFD):
FD_NAME = 'dashsegments' FD_NAME = 'dashsegments'
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
segments = info_dict['fragments'][:1] if self.params.get( fragment_base_url = info_dict.get('fragment_base_url')
fragments = info_dict['fragments'][:1] if self.params.get(
'test', False) else info_dict['fragments'] 'test', False) else info_dict['fragments']
ctx = { ctx = {
'filename': filename, 'filename': filename,
'total_frags': len(segments), 'total_frags': len(fragments),
} }
self._prepare_and_start_frag_download(ctx) self._prepare_and_start_frag_download(ctx)
@ -26,7 +28,7 @@ class DashSegmentsFD(FragmentFD):
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
frag_index = 0 frag_index = 0
for i, segment in enumerate(segments): for i, fragment in enumerate(fragments):
frag_index += 1 frag_index += 1
if frag_index <= ctx['fragment_index']: if frag_index <= ctx['fragment_index']:
continue continue
@ -36,7 +38,11 @@ class DashSegmentsFD(FragmentFD):
count = 0 count = 0
while count <= fragment_retries: while count <= fragment_retries:
try: try:
success, frag_content = self._download_fragment(ctx, segment['url'], info_dict) fragment_url = fragment.get('url')
if not fragment_url:
assert fragment_base_url
fragment_url = urljoin(fragment_base_url, fragment['path'])
success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
if not success: if not success:
return False return False
self._append_fragment(ctx, frag_content) self._append_fragment(ctx, frag_content)

View File

@ -1892,9 +1892,13 @@ class InfoExtractor(object):
'Bandwidth': bandwidth, 'Bandwidth': bandwidth,
} }
def location_key(location):
return 'url' if re.match(r'^https?://', location) else 'path'
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info: if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time')) media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
media_location_key = location_key(media_template)
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$ # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
# can't be used at the same time # can't be used at the same time
@ -1904,7 +1908,7 @@ class InfoExtractor(object):
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale']) segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
representation_ms_info['fragments'] = [{ representation_ms_info['fragments'] = [{
'url': media_template % { media_location_key: media_template % {
'Number': segment_number, 'Number': segment_number,
'Bandwidth': bandwidth, 'Bandwidth': bandwidth,
}, },
@ -1928,7 +1932,7 @@ class InfoExtractor(object):
'Number': segment_number, 'Number': segment_number,
} }
representation_ms_info['fragments'].append({ representation_ms_info['fragments'].append({
'url': segment_url, media_location_key: segment_url,
'duration': float_or_none(segment_d, representation_ms_info['timescale']), 'duration': float_or_none(segment_d, representation_ms_info['timescale']),
}) })
@ -1952,8 +1956,9 @@ class InfoExtractor(object):
for s in representation_ms_info['s']: for s in representation_ms_info['s']:
duration = float_or_none(s['d'], timescale) duration = float_or_none(s['d'], timescale)
for r in range(s.get('r', 0) + 1): for r in range(s.get('r', 0) + 1):
segment_uri = representation_ms_info['segment_urls'][segment_index]
fragments.append({ fragments.append({
'url': representation_ms_info['segment_urls'][segment_index], location_key(segment_uri): segment_uri,
'duration': duration, 'duration': duration,
}) })
segment_index += 1 segment_index += 1
@ -1962,6 +1967,7 @@ class InfoExtractor(object):
# No fragments key is present in this case. # No fragments key is present in this case.
if 'fragments' in representation_ms_info: if 'fragments' in representation_ms_info:
f.update({ f.update({
'fragment_base_url': base_url,
'fragments': [], 'fragments': [],
'protocol': 'http_dash_segments', 'protocol': 'http_dash_segments',
}) })
@ -1969,10 +1975,8 @@ class InfoExtractor(object):
initialization_url = representation_ms_info['initialization_url'] initialization_url = representation_ms_info['initialization_url']
if not f.get('url'): if not f.get('url'):
f['url'] = initialization_url f['url'] = initialization_url
f['fragments'].append({'url': initialization_url}) f['fragments'].append({location_key(initialization_url): initialization_url})
f['fragments'].extend(representation_ms_info['fragments']) f['fragments'].extend(representation_ms_info['fragments'])
for fragment in f['fragments']:
fragment['url'] = urljoin(base_url, fragment['url'])
try: try:
existing_format = next( existing_format = next(
fo for fo in formats fo for fo in formats

View File

@ -994,7 +994,6 @@ from .teachertube import (
) )
from .teachingchannel import TeachingChannelIE from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE from .teamcoco import TeamcocoIE
from .teamfourstar import TeamFourStarIE
from .techtalks import TechTalksIE from .techtalks import TechTalksIE
from .ted import TEDIE from .ted import TEDIE
from .tele13 import Tele13IE from .tele13 import Tele13IE

View File

@ -15,7 +15,7 @@ class MLBIE(InfoExtractor):
(?:[\da-z_-]+\.)*mlb\.com/ (?:[\da-z_-]+\.)*mlb\.com/
(?: (?:
(?: (?:
(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v| (?:.*?/)?video/(?:topic/[\da-z_-]+/)?(?:v|.*?/c-)|
(?: (?:
shared/video/embed/(?:embed|m-internal-embed)\.html| shared/video/embed/(?:embed|m-internal-embed)\.html|
(?:[^/]+/)+(?:play|index)\.jsp| (?:[^/]+/)+(?:play|index)\.jsp|
@ -84,7 +84,7 @@ class MLBIE(InfoExtractor):
}, },
{ {
'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer', 'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer',
'md5': 'b190e70141fb9a1552a85426b4da1b5d', 'md5': 'aafaf5b0186fee8f32f20508092f8111',
'info_dict': { 'info_dict': {
'id': '75609783', 'id': '75609783',
'ext': 'mp4', 'ext': 'mp4',
@ -94,6 +94,10 @@ class MLBIE(InfoExtractor):
'upload_date': '20150415', 'upload_date': '20150415',
} }
}, },
{
'url': 'https://www.mlb.com/video/hargrove-homers-off-caldwell/c-1352023483?tid=67793694',
'only_matching': True,
},
{ {
'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb', 'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb',
'only_matching': True, 'only_matching': True,

View File

@ -189,7 +189,7 @@ class PBSIE(InfoExtractor):
# Direct video URL # Direct video URL
(?:%s)/(?:viralplayer|video)/(?P<id>[0-9]+)/? | (?:%s)/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
# Article with embedded player (or direct video) # Article with embedded player (or direct video)
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) | (?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
# Player # Player
(?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/ (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
) )
@ -345,6 +345,21 @@ class PBSIE(InfoExtractor):
'formats': 'mincount:8', 'formats': 'mincount:8',
}, },
}, },
{
# https://github.com/rg3/youtube-dl/issues/13801
'url': 'https://www.pbs.org/video/pbs-newshour-full-episode-july-31-2017-1501539057/',
'info_dict': {
'id': '3003333873',
'ext': 'mp4',
'title': 'PBS NewsHour - full episode July 31, 2017',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'duration': 3265,
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
'skip_download': True,
},
},
{ {
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true', 'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
'only_matching': True, 'only_matching': True,
@ -433,6 +448,9 @@ class PBSIE(InfoExtractor):
if url: if url:
break break
if not url:
url = self._og_search_url(webpage)
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
player_id = mobj.group('player_id') player_id = mobj.group('player_id')

View File

@ -54,7 +54,7 @@ class PornHdIE(InfoExtractor):
r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title') r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
sources = self._parse_json(js_to_json(self._search_regex( sources = self._parse_json(js_to_json(self._search_regex(
r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]", r"(?s)sources'?\s*:\s*(\{.+?\})\s*\}[;,)]",
webpage, 'sources', default='{}')), video_id) webpage, 'sources', default='{}')), video_id)
if not sources: if not sources:

View File

@ -1,48 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .jwplatform import JWPlatformIE
from ..utils import unified_strdate
class TeamFourStarIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/(?P<id>[a-z0-9\-]+)'
_TEST = {
'url': 'http://teamfourstar.com/tfs-abridged-parody-episode-1-2/',
'info_dict': {
'id': '0WdZO31W',
'title': 'TFS Abridged Parody Episode 1',
'description': 'md5:d60bc389588ebab2ee7ad432bda953ae',
'ext': 'mp4',
'timestamp': 1394168400,
'upload_date': '20080508',
},
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
jwplatform_url = JWPlatformIE._extract_url(webpage)
video_title = self._html_search_regex(
r'<h1[^>]+class="entry-title"[^>]*>(?P<title>.+?)</h1>',
webpage, 'title')
video_date = unified_strdate(self._html_search_regex(
r'<span[^>]+class="meta-date date updated"[^>]*>(?P<date>.+?)</span>',
webpage, 'date', fatal=False))
video_description = self._html_search_regex(
r'(?s)<div[^>]+class="content-inner"[^>]*>.*?(?P<description><p>.+?)</div>',
webpage, 'description', fatal=False)
video_thumbnail = self._og_search_thumbnail(webpage)
return {
'_type': 'url_transparent',
'display_id': display_id,
'title': video_title,
'description': video_description,
'upload_date': video_date,
'thumbnail': video_thumbnail,
'url': jwplatform_url,
}

View File

@ -15,6 +15,7 @@ from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
int_or_none, int_or_none,
js_to_json,
sanitized_Request, sanitized_Request,
unescapeHTML, unescapeHTML,
urlencode_postdata, urlencode_postdata,
@ -268,6 +269,25 @@ class UdemyIE(InfoExtractor):
f = add_output_format_meta(f, format_id) f = add_output_format_meta(f, format_id)
formats.append(f) formats.append(f)
def extract_subtitles(track_list):
if not isinstance(track_list, list):
return
for track in track_list:
if not isinstance(track, dict):
continue
if track.get('kind') != 'captions':
continue
src = track.get('src')
if not src or not isinstance(src, compat_str):
continue
lang = track.get('language') or track.get(
'srclang') or track.get('label')
sub_dict = automatic_captions if track.get(
'autogenerated') is True else subtitles
sub_dict.setdefault(lang, []).append({
'url': src,
})
download_urls = asset.get('download_urls') download_urls = asset.get('download_urls')
if isinstance(download_urls, dict): if isinstance(download_urls, dict):
extract_formats(download_urls.get('Video')) extract_formats(download_urls.get('Video'))
@ -315,23 +335,16 @@ class UdemyIE(InfoExtractor):
extract_formats(data.get('sources')) extract_formats(data.get('sources'))
if not duration: if not duration:
duration = int_or_none(data.get('duration')) duration = int_or_none(data.get('duration'))
tracks = data.get('tracks') extract_subtitles(data.get('tracks'))
if isinstance(tracks, list):
for track in tracks: if not subtitles and not automatic_captions:
if not isinstance(track, dict): text_tracks = self._parse_json(
continue self._search_regex(
if track.get('kind') != 'captions': r'text-tracks=(["\'])(?P<data>\[.+?\])\1', view_html,
continue 'text tracks', default='{}', group='data'), video_id,
src = track.get('src') transform_source=lambda s: js_to_json(unescapeHTML(s)),
if not src or not isinstance(src, compat_str): fatal=False)
continue extract_subtitles(text_tracks)
lang = track.get('language') or track.get(
'srclang') or track.get('label')
sub_dict = automatic_captions if track.get(
'autogenerated') is True else subtitles
sub_dict.setdefault(lang, []).append({
'url': src,
})
self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id')) self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))

View File

@ -3,7 +3,10 @@ from __future__ import unicode_literals
import itertools import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..compat import (
compat_HTTPError,
compat_str,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -161,13 +164,28 @@ class VidmeIE(InfoExtractor):
'or for violating the terms of use.', 'or for violating the terms of use.',
expected=True) expected=True)
formats = [{ formats = []
'format_id': f.get('type'), for f in video.get('formats', []):
'url': f['uri'], format_url = f.get('uri')
'width': int_or_none(f.get('width')), if not format_url or not isinstance(format_url, compat_str):
'height': int_or_none(f.get('height')), continue
'preference': 0 if f.get('type', '').endswith('clip') else 1, format_type = f.get('type')
} for f in video.get('formats', []) if f.get('uri')] if format_type == 'dash':
formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False))
elif format_type == 'hls':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
else:
formats.append({
'format_id': f.get('type'),
'url': format_url,
'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')),
'preference': 0 if f.get('type', '').endswith(
'clip') else 1,
})
if not formats and video.get('complete_url'): if not formats and video.get('complete_url'):
formats.append({ formats.append({

View File

@ -13,9 +13,9 @@ from ..utils import (
class YandexDiskIE(InfoExtractor): class YandexDiskIE(InfoExtractor):
_VALID_URL = r'https?://yadi\.sk/i/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://yadi\.sk/[di]/(?P<id>[^/?#&]+)'
_TEST = { _TESTS = [{
'url': 'https://yadi.sk/i/VdOeDou8eZs6Y', 'url': 'https://yadi.sk/i/VdOeDou8eZs6Y',
'md5': '33955d7ae052f15853dc41f35f17581c', 'md5': '33955d7ae052f15853dc41f35f17581c',
'info_dict': { 'info_dict': {
@ -27,7 +27,10 @@ class YandexDiskIE(InfoExtractor):
'uploader_id': '300043621', 'uploader_id': '300043621',
'view_count': int, 'view_count': int,
}, },
} }, {
'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)