Merge branch 'master' into GoogleDrive-issue-13619

This commit is contained in:
Parmjit Virk 2017-08-04 19:47:05 -05:00
commit 9cfc81df17
12 changed files with 123 additions and 93 deletions

View File

@ -584,7 +584,7 @@ If you are using an output template inside a Windows batch file then you must es
#### Output template examples
Note on Windows you may need to use double quotes instead of single.
Note that on Windows you may need to use double quotes instead of single.
```bash
$ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc
@ -671,7 +671,7 @@ If you want to preserve the old format selection behavior (prior to youtube-dl 2
#### Format selection examples
Note on Windows you may need to use double quotes instead of single.
Note that on Windows you may need to use double quotes instead of single.
```bash
# Download best mp4 format available or any other best if no mp4 available

View File

@ -371,6 +371,19 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL({'format': 'best[height>360]'})
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
def test_format_selection_issue_10083(self):
# See https://github.com/rg3/youtube-dl/issues/10083
formats = [
{'format_id': 'regular', 'height': 360, 'url': TEST_URL},
{'format_id': 'video', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
{'format_id': 'audio', 'vcodec': 'none', 'url': TEST_URL},
]
info_dict = _make_result(formats)
ydl = YDL({'format': 'best[height>360]/bestvideo[height>360]+bestaudio'})
ydl.process_ie_result(info_dict.copy())
self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'video+audio')
def test_invalid_format_specs(self):
def assert_syntax_error(format_spec):
ydl = YDL({'format': format_spec})

View File

@ -2,6 +2,7 @@ from __future__ import unicode_literals
from .fragment import FragmentFD
from ..compat import compat_urllib_error
from ..utils import urljoin
class DashSegmentsFD(FragmentFD):
@ -12,12 +13,13 @@ class DashSegmentsFD(FragmentFD):
FD_NAME = 'dashsegments'
def real_download(self, filename, info_dict):
segments = info_dict['fragments'][:1] if self.params.get(
fragment_base_url = info_dict.get('fragment_base_url')
fragments = info_dict['fragments'][:1] if self.params.get(
'test', False) else info_dict['fragments']
ctx = {
'filename': filename,
'total_frags': len(segments),
'total_frags': len(fragments),
}
self._prepare_and_start_frag_download(ctx)
@ -26,7 +28,7 @@ class DashSegmentsFD(FragmentFD):
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
frag_index = 0
for i, segment in enumerate(segments):
for i, fragment in enumerate(fragments):
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue
@ -36,7 +38,11 @@ class DashSegmentsFD(FragmentFD):
count = 0
while count <= fragment_retries:
try:
success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
fragment_url = fragment.get('url')
if not fragment_url:
assert fragment_base_url
fragment_url = urljoin(fragment_base_url, fragment['path'])
success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
if not success:
return False
self._append_fragment(ctx, frag_content)

View File

@ -1892,9 +1892,13 @@ class InfoExtractor(object):
'Bandwidth': bandwidth,
}
def location_key(location):
return 'url' if re.match(r'^https?://', location) else 'path'
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
media_location_key = location_key(media_template)
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
# can't be used at the same time
@ -1904,7 +1908,7 @@ class InfoExtractor(object):
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
representation_ms_info['fragments'] = [{
'url': media_template % {
media_location_key: media_template % {
'Number': segment_number,
'Bandwidth': bandwidth,
},
@ -1928,7 +1932,7 @@ class InfoExtractor(object):
'Number': segment_number,
}
representation_ms_info['fragments'].append({
'url': segment_url,
media_location_key: segment_url,
'duration': float_or_none(segment_d, representation_ms_info['timescale']),
})
@ -1952,8 +1956,9 @@ class InfoExtractor(object):
for s in representation_ms_info['s']:
duration = float_or_none(s['d'], timescale)
for r in range(s.get('r', 0) + 1):
segment_uri = representation_ms_info['segment_urls'][segment_index]
fragments.append({
'url': representation_ms_info['segment_urls'][segment_index],
location_key(segment_uri): segment_uri,
'duration': duration,
})
segment_index += 1
@ -1962,6 +1967,7 @@ class InfoExtractor(object):
# No fragments key is present in this case.
if 'fragments' in representation_ms_info:
f.update({
'fragment_base_url': base_url,
'fragments': [],
'protocol': 'http_dash_segments',
})
@ -1969,10 +1975,8 @@ class InfoExtractor(object):
initialization_url = representation_ms_info['initialization_url']
if not f.get('url'):
f['url'] = initialization_url
f['fragments'].append({'url': initialization_url})
f['fragments'].append({location_key(initialization_url): initialization_url})
f['fragments'].extend(representation_ms_info['fragments'])
for fragment in f['fragments']:
fragment['url'] = urljoin(base_url, fragment['url'])
try:
existing_format = next(
fo for fo in formats

View File

@ -994,7 +994,6 @@ from .teachertube import (
)
from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE
from .teamfourstar import TeamFourStarIE
from .techtalks import TechTalksIE
from .ted import TEDIE
from .tele13 import Tele13IE

View File

@ -15,7 +15,7 @@ class MLBIE(InfoExtractor):
(?:[\da-z_-]+\.)*mlb\.com/
(?:
(?:
(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|
(?:.*?/)?video/(?:topic/[\da-z_-]+/)?(?:v|.*?/c-)|
(?:
shared/video/embed/(?:embed|m-internal-embed)\.html|
(?:[^/]+/)+(?:play|index)\.jsp|
@ -84,7 +84,7 @@ class MLBIE(InfoExtractor):
},
{
'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer',
'md5': 'b190e70141fb9a1552a85426b4da1b5d',
'md5': 'aafaf5b0186fee8f32f20508092f8111',
'info_dict': {
'id': '75609783',
'ext': 'mp4',
@ -94,6 +94,10 @@ class MLBIE(InfoExtractor):
'upload_date': '20150415',
}
},
{
'url': 'https://www.mlb.com/video/hargrove-homers-off-caldwell/c-1352023483?tid=67793694',
'only_matching': True,
},
{
'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb',
'only_matching': True,

View File

@ -189,7 +189,7 @@ class PBSIE(InfoExtractor):
# Direct video URL
(?:%s)/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
# Article with embedded player (or direct video)
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
(?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
# Player
(?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
)
@ -345,6 +345,21 @@ class PBSIE(InfoExtractor):
'formats': 'mincount:8',
},
},
{
# https://github.com/rg3/youtube-dl/issues/13801
'url': 'https://www.pbs.org/video/pbs-newshour-full-episode-july-31-2017-1501539057/',
'info_dict': {
'id': '3003333873',
'ext': 'mp4',
'title': 'PBS NewsHour - full episode July 31, 2017',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'duration': 3265,
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
'skip_download': True,
},
},
{
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
'only_matching': True,
@ -433,6 +448,9 @@ class PBSIE(InfoExtractor):
if url:
break
if not url:
url = self._og_search_url(webpage)
mobj = re.match(self._VALID_URL, url)
player_id = mobj.group('player_id')

View File

@ -54,7 +54,7 @@ class PornHdIE(InfoExtractor):
r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
sources = self._parse_json(js_to_json(self._search_regex(
r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]",
r"(?s)sources'?\s*:\s*(\{.+?\})\s*\}[;,)]",
webpage, 'sources', default='{}')), video_id)
if not sources:

View File

@ -1,48 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .jwplatform import JWPlatformIE
from ..utils import unified_strdate
class TeamFourStarIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/(?P<id>[a-z0-9\-]+)'
_TEST = {
'url': 'http://teamfourstar.com/tfs-abridged-parody-episode-1-2/',
'info_dict': {
'id': '0WdZO31W',
'title': 'TFS Abridged Parody Episode 1',
'description': 'md5:d60bc389588ebab2ee7ad432bda953ae',
'ext': 'mp4',
'timestamp': 1394168400,
'upload_date': '20080508',
},
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
jwplatform_url = JWPlatformIE._extract_url(webpage)
video_title = self._html_search_regex(
r'<h1[^>]+class="entry-title"[^>]*>(?P<title>.+?)</h1>',
webpage, 'title')
video_date = unified_strdate(self._html_search_regex(
r'<span[^>]+class="meta-date date updated"[^>]*>(?P<date>.+?)</span>',
webpage, 'date', fatal=False))
video_description = self._html_search_regex(
r'(?s)<div[^>]+class="content-inner"[^>]*>.*?(?P<description><p>.+?)</div>',
webpage, 'description', fatal=False)
video_thumbnail = self._og_search_thumbnail(webpage)
return {
'_type': 'url_transparent',
'display_id': display_id,
'title': video_title,
'description': video_description,
'upload_date': video_date,
'thumbnail': video_thumbnail,
'url': jwplatform_url,
}

View File

@ -15,6 +15,7 @@ from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
js_to_json,
sanitized_Request,
unescapeHTML,
urlencode_postdata,
@ -268,6 +269,25 @@ class UdemyIE(InfoExtractor):
f = add_output_format_meta(f, format_id)
formats.append(f)
def extract_subtitles(track_list):
if not isinstance(track_list, list):
return
for track in track_list:
if not isinstance(track, dict):
continue
if track.get('kind') != 'captions':
continue
src = track.get('src')
if not src or not isinstance(src, compat_str):
continue
lang = track.get('language') or track.get(
'srclang') or track.get('label')
sub_dict = automatic_captions if track.get(
'autogenerated') is True else subtitles
sub_dict.setdefault(lang, []).append({
'url': src,
})
download_urls = asset.get('download_urls')
if isinstance(download_urls, dict):
extract_formats(download_urls.get('Video'))
@ -315,23 +335,16 @@ class UdemyIE(InfoExtractor):
extract_formats(data.get('sources'))
if not duration:
duration = int_or_none(data.get('duration'))
tracks = data.get('tracks')
if isinstance(tracks, list):
for track in tracks:
if not isinstance(track, dict):
continue
if track.get('kind') != 'captions':
continue
src = track.get('src')
if not src or not isinstance(src, compat_str):
continue
lang = track.get('language') or track.get(
'srclang') or track.get('label')
sub_dict = automatic_captions if track.get(
'autogenerated') is True else subtitles
sub_dict.setdefault(lang, []).append({
'url': src,
})
extract_subtitles(data.get('tracks'))
if not subtitles and not automatic_captions:
text_tracks = self._parse_json(
self._search_regex(
r'text-tracks=(["\'])(?P<data>\[.+?\])\1', view_html,
'text tracks', default='{}', group='data'), video_id,
transform_source=lambda s: js_to_json(unescapeHTML(s)),
fatal=False)
extract_subtitles(text_tracks)
self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))

View File

@ -3,7 +3,10 @@ from __future__ import unicode_literals
import itertools
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..compat import (
compat_HTTPError,
compat_str,
)
from ..utils import (
ExtractorError,
int_or_none,
@ -161,13 +164,28 @@ class VidmeIE(InfoExtractor):
'or for violating the terms of use.',
expected=True)
formats = [{
'format_id': f.get('type'),
'url': f['uri'],
'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')),
'preference': 0 if f.get('type', '').endswith('clip') else 1,
} for f in video.get('formats', []) if f.get('uri')]
formats = []
for f in video.get('formats', []):
format_url = f.get('uri')
if not format_url or not isinstance(format_url, compat_str):
continue
format_type = f.get('type')
if format_type == 'dash':
formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False))
elif format_type == 'hls':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
else:
formats.append({
'format_id': f.get('type'),
'url': format_url,
'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')),
'preference': 0 if f.get('type', '').endswith(
'clip') else 1,
})
if not formats and video.get('complete_url'):
formats.append({

View File

@ -13,9 +13,9 @@ from ..utils import (
class YandexDiskIE(InfoExtractor):
_VALID_URL = r'https?://yadi\.sk/i/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://yadi\.sk/[di]/(?P<id>[^/?#&]+)'
_TEST = {
_TESTS = [{
'url': 'https://yadi.sk/i/VdOeDou8eZs6Y',
'md5': '33955d7ae052f15853dc41f35f17581c',
'info_dict': {
@ -27,7 +27,10 @@ class YandexDiskIE(InfoExtractor):
'uploader_id': '300043621',
'view_count': int,
},
}
}, {
'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)