Merge branch 'master' of https://github.com/rg3/youtube-dl
This commit is contained in:
commit
e55d4db42b
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.07**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.11**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2016.07.07
|
[debug] youtube-dl version 2016.07.11
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
1
AUTHORS
1
AUTHORS
@ -177,3 +177,4 @@ Roman Tsiupa
|
|||||||
Artur Krysiak
|
Artur Krysiak
|
||||||
Jakub Adam Wieczorek
|
Jakub Adam Wieczorek
|
||||||
Aleksandar Topuzović
|
Aleksandar Topuzović
|
||||||
|
Nehal Patel
|
||||||
|
@ -224,6 +224,7 @@
|
|||||||
- **Firstpost**
|
- **Firstpost**
|
||||||
- **FiveTV**
|
- **FiveTV**
|
||||||
- **Flickr**
|
- **Flickr**
|
||||||
|
- **Flipagram**
|
||||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||||
- **FootyRoom**
|
- **FootyRoom**
|
||||||
- **Formula1**
|
- **Formula1**
|
||||||
@ -553,6 +554,7 @@
|
|||||||
- **RICE**
|
- **RICE**
|
||||||
- **RingTV**
|
- **RingTV**
|
||||||
- **RockstarGames**
|
- **RockstarGames**
|
||||||
|
- **RoosterTeeth**
|
||||||
- **RottenTomatoes**
|
- **RottenTomatoes**
|
||||||
- **Roxwel**
|
- **Roxwel**
|
||||||
- **RTBF**
|
- **RTBF**
|
||||||
|
@ -81,6 +81,7 @@ from youtube_dl.utils import (
|
|||||||
cli_option,
|
cli_option,
|
||||||
cli_valueless_option,
|
cli_valueless_option,
|
||||||
cli_bool_option,
|
cli_bool_option,
|
||||||
|
parse_codecs,
|
||||||
)
|
)
|
||||||
from youtube_dl.compat import (
|
from youtube_dl.compat import (
|
||||||
compat_chr,
|
compat_chr,
|
||||||
@ -608,6 +609,29 @@ class TestUtil(unittest.TestCase):
|
|||||||
limit_length('foo bar baz asd', 12).startswith('foo bar'))
|
limit_length('foo bar baz asd', 12).startswith('foo bar'))
|
||||||
self.assertTrue('...' in limit_length('foo bar baz asd', 12))
|
self.assertTrue('...' in limit_length('foo bar baz asd', 12))
|
||||||
|
|
||||||
|
def test_parse_codecs(self):
|
||||||
|
self.assertEqual(parse_codecs(''), {})
|
||||||
|
self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), {
|
||||||
|
'vcodec': 'avc1.77.30',
|
||||||
|
'acodec': 'mp4a.40.2',
|
||||||
|
})
|
||||||
|
self.assertEqual(parse_codecs('mp4a.40.2'), {
|
||||||
|
'vcodec': 'none',
|
||||||
|
'acodec': 'mp4a.40.2',
|
||||||
|
})
|
||||||
|
self.assertEqual(parse_codecs('mp4a.40.5,avc1.42001e'), {
|
||||||
|
'vcodec': 'avc1.42001e',
|
||||||
|
'acodec': 'mp4a.40.5',
|
||||||
|
})
|
||||||
|
self.assertEqual(parse_codecs('avc3.640028'), {
|
||||||
|
'vcodec': 'avc3.640028',
|
||||||
|
'acodec': 'none',
|
||||||
|
})
|
||||||
|
self.assertEqual(parse_codecs(', h264,,newcodec,aac'), {
|
||||||
|
'vcodec': 'h264',
|
||||||
|
'acodec': 'aac',
|
||||||
|
})
|
||||||
|
|
||||||
def test_escape_rfc3986(self):
|
def test_escape_rfc3986(self):
|
||||||
reserved = "!*'();:@&=+$,/?#[]"
|
reserved = "!*'();:@&=+$,/?#[]"
|
||||||
unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~'
|
unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~'
|
||||||
|
@ -22,6 +22,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
|
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
|
||||||
_NETRC_MACHINE = 'animeondemand'
|
_NETRC_MACHINE = 'animeondemand'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# jap, OmU
|
||||||
'url': 'https://www.anime-on-demand.de/anime/161',
|
'url': 'https://www.anime-on-demand.de/anime/161',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '161',
|
'id': '161',
|
||||||
@ -30,17 +31,21 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'playlist_mincount': 4,
|
'playlist_mincount': 4,
|
||||||
}, {
|
}, {
|
||||||
# Film wording is used instead of Episode
|
# Film wording is used instead of Episode, ger/jap, Dub/OmU
|
||||||
'url': 'https://www.anime-on-demand.de/anime/39',
|
'url': 'https://www.anime-on-demand.de/anime/39',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# Episodes without titles
|
# Episodes without titles, jap, OmU
|
||||||
'url': 'https://www.anime-on-demand.de/anime/162',
|
'url': 'https://www.anime-on-demand.de/anime/162',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# ger/jap, Dub/OmU, account required
|
# ger/jap, Dub/OmU, account required
|
||||||
'url': 'https://www.anime-on-demand.de/anime/169',
|
'url': 'https://www.anime-on-demand.de/anime/169',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Full length film, non-series, ger/jap, Dub/OmU, account required
|
||||||
|
'url': 'https://www.anime-on-demand.de/anime/185',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
@ -110,35 +115,12 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
for num, episode_html in enumerate(re.findall(
|
def extract_info(html, video_id, num=None):
|
||||||
r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage), 1):
|
title, description = [None] * 2
|
||||||
episodebox_title = self._search_regex(
|
|
||||||
(r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
|
|
||||||
r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
|
|
||||||
episode_html, 'episodebox title', default=None, group='title')
|
|
||||||
if not episodebox_title:
|
|
||||||
continue
|
|
||||||
|
|
||||||
episode_number = int(self._search_regex(
|
|
||||||
r'(?:Episode|Film)\s*(\d+)',
|
|
||||||
episodebox_title, 'episode number', default=num))
|
|
||||||
episode_title = self._search_regex(
|
|
||||||
r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
|
|
||||||
episodebox_title, 'episode title', default=None)
|
|
||||||
|
|
||||||
video_id = 'episode-%d' % episode_number
|
|
||||||
|
|
||||||
common_info = {
|
|
||||||
'id': video_id,
|
|
||||||
'series': anime_title,
|
|
||||||
'episode': episode_title,
|
|
||||||
'episode_number': episode_number,
|
|
||||||
}
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
for input_ in re.findall(
|
for input_ in re.findall(
|
||||||
r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', episode_html):
|
r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', html):
|
||||||
attributes = extract_attributes(input_)
|
attributes = extract_attributes(input_)
|
||||||
playlist_urls = []
|
playlist_urls = []
|
||||||
for playlist_key in ('data-playlist', 'data-otherplaylist'):
|
for playlist_key in ('data-playlist', 'data-otherplaylist'):
|
||||||
@ -161,7 +143,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
format_id_list.append(lang)
|
format_id_list.append(lang)
|
||||||
if kind:
|
if kind:
|
||||||
format_id_list.append(kind)
|
format_id_list.append(kind)
|
||||||
if not format_id_list:
|
if not format_id_list and num is not None:
|
||||||
format_id_list.append(compat_str(num))
|
format_id_list.append(compat_str(num))
|
||||||
format_id = '-'.join(format_id_list)
|
format_id = '-'.join(format_id_list)
|
||||||
format_note = ', '.join(filter(None, (kind, lang_note)))
|
format_note = ', '.join(filter(None, (kind, lang_note)))
|
||||||
@ -215,28 +197,74 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
formats.extend(file_formats)
|
formats.extend(file_formats)
|
||||||
|
|
||||||
if formats:
|
return {
|
||||||
self._sort_formats(formats)
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
def extract_entries(html, video_id, common_info, num=None):
|
||||||
|
info = extract_info(html, video_id, num)
|
||||||
|
|
||||||
|
if info['formats']:
|
||||||
|
self._sort_formats(info['formats'])
|
||||||
f = common_info.copy()
|
f = common_info.copy()
|
||||||
f.update({
|
f.update(info)
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'formats': formats,
|
|
||||||
})
|
|
||||||
entries.append(f)
|
entries.append(f)
|
||||||
|
|
||||||
# Extract teaser only when full episode is not available
|
# Extract teaser/trailer only when full episode is not available
|
||||||
if not formats:
|
if not info['formats']:
|
||||||
m = re.search(
|
m = re.search(
|
||||||
r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<',
|
r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>(?P<kind>Teaser|Trailer)<',
|
||||||
episode_html)
|
html)
|
||||||
if m:
|
if m:
|
||||||
f = common_info.copy()
|
f = common_info.copy()
|
||||||
f.update({
|
f.update({
|
||||||
'id': '%s-teaser' % f['id'],
|
'id': '%s-%s' % (f['id'], m.group('kind').lower()),
|
||||||
'title': m.group('title'),
|
'title': m.group('title'),
|
||||||
'url': compat_urlparse.urljoin(url, m.group('href')),
|
'url': compat_urlparse.urljoin(url, m.group('href')),
|
||||||
})
|
})
|
||||||
entries.append(f)
|
entries.append(f)
|
||||||
|
|
||||||
|
def extract_episodes(html):
|
||||||
|
for num, episode_html in enumerate(re.findall(
|
||||||
|
r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', html), 1):
|
||||||
|
episodebox_title = self._search_regex(
|
||||||
|
(r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||||
|
r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
|
||||||
|
episode_html, 'episodebox title', default=None, group='title')
|
||||||
|
if not episodebox_title:
|
||||||
|
continue
|
||||||
|
|
||||||
|
episode_number = int(self._search_regex(
|
||||||
|
r'(?:Episode|Film)\s*(\d+)',
|
||||||
|
episodebox_title, 'episode number', default=num))
|
||||||
|
episode_title = self._search_regex(
|
||||||
|
r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
|
||||||
|
episodebox_title, 'episode title', default=None)
|
||||||
|
|
||||||
|
video_id = 'episode-%d' % episode_number
|
||||||
|
|
||||||
|
common_info = {
|
||||||
|
'id': video_id,
|
||||||
|
'series': anime_title,
|
||||||
|
'episode': episode_title,
|
||||||
|
'episode_number': episode_number,
|
||||||
|
}
|
||||||
|
|
||||||
|
extract_entries(episode_html, video_id, common_info)
|
||||||
|
|
||||||
|
def extract_film(html, video_id):
|
||||||
|
common_info = {
|
||||||
|
'id': anime_id,
|
||||||
|
'title': anime_title,
|
||||||
|
'description': anime_description,
|
||||||
|
}
|
||||||
|
extract_entries(html, video_id, common_info)
|
||||||
|
|
||||||
|
extract_episodes(webpage)
|
||||||
|
|
||||||
|
if not entries:
|
||||||
|
extract_film(webpage, anime_id)
|
||||||
|
|
||||||
return self.playlist_result(entries, anime_id, anime_title, anime_description)
|
return self.playlist_result(entries, anime_id, anime_title, anime_description)
|
||||||
|
@ -13,6 +13,7 @@ from ..utils import (
|
|||||||
parse_duration,
|
parse_duration,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
|
update_url_query,
|
||||||
)
|
)
|
||||||
from ..compat import compat_etree_fromstring
|
from ..compat import compat_etree_fromstring
|
||||||
|
|
||||||
@ -34,6 +35,7 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'HTTP Error 404: Not Found',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ardmediathek.de/tv/Tatort/Tatort-Scheinwelten-H%C3%B6rfassung-Video/Das-Erste/Video?documentId=29522730&bcastId=602916',
|
'url': 'http://www.ardmediathek.de/tv/Tatort/Tatort-Scheinwelten-H%C3%B6rfassung-Video/Das-Erste/Video?documentId=29522730&bcastId=602916',
|
||||||
'md5': 'f4d98b10759ac06c0072bbcd1f0b9e3e',
|
'md5': 'f4d98b10759ac06c0072bbcd1f0b9e3e',
|
||||||
@ -44,6 +46,7 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
'description': 'md5:196392e79876d0ac94c94e8cdb2875f1',
|
'description': 'md5:196392e79876d0ac94c94e8cdb2875f1',
|
||||||
'duration': 5252,
|
'duration': 5252,
|
||||||
},
|
},
|
||||||
|
'skip': 'HTTP Error 404: Not Found',
|
||||||
}, {
|
}, {
|
||||||
# audio
|
# audio
|
||||||
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
|
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
|
||||||
@ -55,6 +58,7 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
'description': 'md5:f6e39f3461f0e1f54bfa48c8875c86ef',
|
'description': 'md5:f6e39f3461f0e1f54bfa48c8875c86ef',
|
||||||
'duration': 3240,
|
'duration': 3240,
|
||||||
},
|
},
|
||||||
|
'skip': 'HTTP Error 404: Not Found',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
|
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -113,11 +117,14 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
continue
|
continue
|
||||||
if ext == 'f4m':
|
if ext == 'f4m':
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
stream_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
|
update_url_query(stream_url, {
|
||||||
video_id, preference=-1, f4m_id='hds', fatal=False))
|
'hdcore': '3.1.1',
|
||||||
|
'plugin': 'aasp-3.1.1.69.124'
|
||||||
|
}),
|
||||||
|
video_id, f4m_id='hds', fatal=False))
|
||||||
elif ext == 'm3u8':
|
elif ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
stream_url, video_id, 'mp4', preference=1, m3u8_id='hls', fatal=False))
|
stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||||
else:
|
else:
|
||||||
if server and server.startswith('rtmp'):
|
if server and server.startswith('rtmp'):
|
||||||
f = {
|
f = {
|
||||||
@ -231,7 +238,8 @@ class ARDIE(InfoExtractor):
|
|||||||
'title': 'Die Story im Ersten: Mission unter falscher Flagge',
|
'title': 'Die Story im Ersten: Mission unter falscher Flagge',
|
||||||
'upload_date': '20140804',
|
'upload_date': '20140804',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
}
|
},
|
||||||
|
'skip': 'HTTP Error 404: Not Found',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -2,11 +2,15 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import remove_end
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
remove_end,
|
||||||
|
)
|
||||||
|
from .rudo import RudoIE
|
||||||
|
|
||||||
|
|
||||||
class BioBioChileTVIE(InfoExtractor):
|
class BioBioChileTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://tv\.biobiochile\.cl/notas/(?:[^/]+/)+(?P<id>[^/]+)\.shtml'
|
_VALID_URL = r'https?://(?:tv|www)\.biobiochile\.cl/(?:notas|noticias)/(?:[^/]+/)+(?P<id>[^/]+)\.shtml'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml',
|
'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml',
|
||||||
@ -18,6 +22,7 @@ class BioBioChileTVIE(InfoExtractor):
|
|||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'uploader': 'Fernando Atria',
|
'uploader': 'Fernando Atria',
|
||||||
},
|
},
|
||||||
|
'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html',
|
||||||
}, {
|
}, {
|
||||||
# different uploader layout
|
# different uploader layout
|
||||||
'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml',
|
'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml',
|
||||||
@ -32,6 +37,16 @@ class BioBioChileTVIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html',
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader': '(none)',
|
||||||
|
'upload_date': '20160708',
|
||||||
|
'title': 'Edecanes del Congreso: Figuras decorativas que le cuestan muy caro a los chilenos',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
|
'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -45,42 +60,22 @@ class BioBioChileTVIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
rudo_url = RudoIE._extract_url(webpage)
|
||||||
|
if not rudo_url:
|
||||||
|
raise ExtractorError('No videos found')
|
||||||
|
|
||||||
title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV')
|
title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV')
|
||||||
|
|
||||||
file_url = self._search_regex(
|
|
||||||
r'loadFWPlayerVideo\([^,]+,\s*(["\'])(?P<url>.+?)\1',
|
|
||||||
webpage, 'file url', group='url')
|
|
||||||
|
|
||||||
base_url = self._search_regex(
|
|
||||||
r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*fileURL', webpage,
|
|
||||||
'base url', default='http://unlimited2-cl.digitalproserver.com/bbtv/',
|
|
||||||
group='url')
|
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
'%s%s/playlist.m3u8' % (base_url, file_url), video_id, 'mp4',
|
|
||||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
|
||||||
f = {
|
|
||||||
'url': '%s%s' % (base_url, file_url),
|
|
||||||
'format_id': 'http',
|
|
||||||
'protocol': 'http',
|
|
||||||
'preference': 1,
|
|
||||||
}
|
|
||||||
if formats:
|
|
||||||
f_copy = formats[-1].copy()
|
|
||||||
f_copy.update(f)
|
|
||||||
f = f_copy
|
|
||||||
formats.append(f)
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'<a[^>]+href=["\']https?://busca\.biobiochile\.cl/author[^>]+>(.+?)</a>',
|
r'<a[^>]+href=["\']https?://(?:busca|www)\.biobiochile\.cl/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': rudo_url,
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'formats': formats,
|
|
||||||
}
|
}
|
||||||
|
@ -44,6 +44,7 @@ from ..utils import (
|
|||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
unified_timestamp,
|
||||||
url_basename,
|
url_basename,
|
||||||
xpath_element,
|
xpath_element,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
@ -54,6 +55,8 @@ from ..utils import (
|
|||||||
update_Request,
|
update_Request,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
parse_m3u8_attributes,
|
parse_m3u8_attributes,
|
||||||
|
extract_attributes,
|
||||||
|
parse_codecs,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -161,6 +164,7 @@ class InfoExtractor(object):
|
|||||||
* "height" (optional, int)
|
* "height" (optional, int)
|
||||||
* "resolution" (optional, string "{width}x{height"},
|
* "resolution" (optional, string "{width}x{height"},
|
||||||
deprecated)
|
deprecated)
|
||||||
|
* "filesize" (optional, int)
|
||||||
thumbnail: Full URL to a video thumbnail image.
|
thumbnail: Full URL to a video thumbnail image.
|
||||||
description: Full video description.
|
description: Full video description.
|
||||||
uploader: Full name of the video uploader.
|
uploader: Full name of the video uploader.
|
||||||
@ -803,15 +807,17 @@ class InfoExtractor(object):
|
|||||||
return self._html_search_meta('twitter:player', html,
|
return self._html_search_meta('twitter:player', html,
|
||||||
'twitter card player')
|
'twitter card player')
|
||||||
|
|
||||||
def _search_json_ld(self, html, video_id, **kwargs):
|
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
|
||||||
json_ld = self._search_regex(
|
json_ld = self._search_regex(
|
||||||
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
|
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
|
||||||
html, 'JSON-LD', group='json_ld', **kwargs)
|
html, 'JSON-LD', group='json_ld', **kwargs)
|
||||||
if not json_ld:
|
if not json_ld:
|
||||||
return {}
|
return {}
|
||||||
return self._json_ld(json_ld, video_id, fatal=kwargs.get('fatal', True))
|
return self._json_ld(
|
||||||
|
json_ld, video_id, fatal=kwargs.get('fatal', True),
|
||||||
|
expected_type=expected_type)
|
||||||
|
|
||||||
def _json_ld(self, json_ld, video_id, fatal=True):
|
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
|
||||||
if isinstance(json_ld, compat_str):
|
if isinstance(json_ld, compat_str):
|
||||||
json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
|
json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
|
||||||
if not json_ld:
|
if not json_ld:
|
||||||
@ -819,6 +825,8 @@ class InfoExtractor(object):
|
|||||||
info = {}
|
info = {}
|
||||||
if json_ld.get('@context') == 'http://schema.org':
|
if json_ld.get('@context') == 'http://schema.org':
|
||||||
item_type = json_ld.get('@type')
|
item_type = json_ld.get('@type')
|
||||||
|
if expected_type is not None and expected_type != item_type:
|
||||||
|
return info
|
||||||
if item_type == 'TVEpisode':
|
if item_type == 'TVEpisode':
|
||||||
info.update({
|
info.update({
|
||||||
'episode': unescapeHTML(json_ld.get('name')),
|
'episode': unescapeHTML(json_ld.get('name')),
|
||||||
@ -837,6 +845,19 @@ class InfoExtractor(object):
|
|||||||
'title': unescapeHTML(json_ld.get('headline')),
|
'title': unescapeHTML(json_ld.get('headline')),
|
||||||
'description': unescapeHTML(json_ld.get('articleBody')),
|
'description': unescapeHTML(json_ld.get('articleBody')),
|
||||||
})
|
})
|
||||||
|
elif item_type == 'VideoObject':
|
||||||
|
info.update({
|
||||||
|
'url': json_ld.get('contentUrl'),
|
||||||
|
'title': unescapeHTML(json_ld.get('name')),
|
||||||
|
'description': unescapeHTML(json_ld.get('description')),
|
||||||
|
'thumbnail': json_ld.get('thumbnailUrl'),
|
||||||
|
'duration': parse_duration(json_ld.get('duration')),
|
||||||
|
'timestamp': unified_timestamp(json_ld.get('uploadDate')),
|
||||||
|
'filesize': float_or_none(json_ld.get('contentSize')),
|
||||||
|
'tbr': int_or_none(json_ld.get('bitrate')),
|
||||||
|
'width': int_or_none(json_ld.get('width')),
|
||||||
|
'height': int_or_none(json_ld.get('height')),
|
||||||
|
})
|
||||||
return dict((k, v) for k, v in info.items() if v is not None)
|
return dict((k, v) for k, v in info.items() if v is not None)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -1616,6 +1637,62 @@ class InfoExtractor(object):
|
|||||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
|
def _parse_html5_media_entries(self, base_url, webpage):
|
||||||
|
def absolute_url(video_url):
|
||||||
|
return compat_urlparse.urljoin(base_url, video_url)
|
||||||
|
|
||||||
|
def parse_content_type(content_type):
|
||||||
|
if not content_type:
|
||||||
|
return {}
|
||||||
|
ctr = re.search(r'(?P<mimetype>[^/]+/[^;]+)(?:;\s*codecs="?(?P<codecs>[^"]+))?', content_type)
|
||||||
|
if ctr:
|
||||||
|
mimetype, codecs = ctr.groups()
|
||||||
|
f = parse_codecs(codecs)
|
||||||
|
f['ext'] = mimetype2ext(mimetype)
|
||||||
|
return f
|
||||||
|
return {}
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for media_tag, media_type, media_content in re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage):
|
||||||
|
media_info = {
|
||||||
|
'formats': [],
|
||||||
|
'subtitles': {},
|
||||||
|
}
|
||||||
|
media_attributes = extract_attributes(media_tag)
|
||||||
|
src = media_attributes.get('src')
|
||||||
|
if src:
|
||||||
|
media_info['formats'].append({
|
||||||
|
'url': absolute_url(src),
|
||||||
|
'vcodec': 'none' if media_type == 'audio' else None,
|
||||||
|
})
|
||||||
|
media_info['thumbnail'] = media_attributes.get('poster')
|
||||||
|
if media_content:
|
||||||
|
for source_tag in re.findall(r'<source[^>]+>', media_content):
|
||||||
|
source_attributes = extract_attributes(source_tag)
|
||||||
|
src = source_attributes.get('src')
|
||||||
|
if not src:
|
||||||
|
continue
|
||||||
|
f = parse_content_type(source_attributes.get('type'))
|
||||||
|
f.update({
|
||||||
|
'url': absolute_url(src),
|
||||||
|
'vcodec': 'none' if media_type == 'audio' else None,
|
||||||
|
})
|
||||||
|
media_info['formats'].append(f)
|
||||||
|
for track_tag in re.findall(r'<track[^>]+>', media_content):
|
||||||
|
track_attributes = extract_attributes(track_tag)
|
||||||
|
kind = track_attributes.get('kind')
|
||||||
|
if not kind or kind == 'subtitles':
|
||||||
|
src = track_attributes.get('src')
|
||||||
|
if not src:
|
||||||
|
continue
|
||||||
|
lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label')
|
||||||
|
media_info['subtitles'].setdefault(lang, []).append({
|
||||||
|
'url': absolute_url(src),
|
||||||
|
})
|
||||||
|
if media_info['formats']:
|
||||||
|
entries.append(media_info)
|
||||||
|
return entries
|
||||||
|
|
||||||
def _live_title(self, name):
|
def _live_title(self, name):
|
||||||
""" Generate the title for a live video """
|
""" Generate the title for a live video """
|
||||||
now = datetime.datetime.now()
|
now = datetime.datetime.now()
|
||||||
|
@ -256,6 +256,7 @@ from .fivemin import FiveMinIE
|
|||||||
from .fivetv import FiveTVIE
|
from .fivetv import FiveTVIE
|
||||||
from .fktv import FKTVIE
|
from .fktv import FKTVIE
|
||||||
from .flickr import FlickrIE
|
from .flickr import FlickrIE
|
||||||
|
from .flipagram import FlipagramIE
|
||||||
from .folketinget import FolketingetIE
|
from .folketinget import FolketingetIE
|
||||||
from .footyroom import FootyRoomIE
|
from .footyroom import FootyRoomIE
|
||||||
from .formula1 import Formula1IE
|
from .formula1 import Formula1IE
|
||||||
@ -679,6 +680,7 @@ from .rice import RICEIE
|
|||||||
from .ringtv import RingTVIE
|
from .ringtv import RingTVIE
|
||||||
from .ro220 import Ro220IE
|
from .ro220 import Ro220IE
|
||||||
from .rockstargames import RockstarGamesIE
|
from .rockstargames import RockstarGamesIE
|
||||||
|
from .roosterteeth import RoosterTeethIE
|
||||||
from .rottentomatoes import RottenTomatoesIE
|
from .rottentomatoes import RottenTomatoesIE
|
||||||
from .roxwel import RoxwelIE
|
from .roxwel import RoxwelIE
|
||||||
from .rtbf import RTBFIE
|
from .rtbf import RTBFIE
|
||||||
@ -689,6 +691,7 @@ from .rtp import RTPIE
|
|||||||
from .rts import RTSIE
|
from .rts import RTSIE
|
||||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
|
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
|
||||||
from .rtvnh import RTVNHIE
|
from .rtvnh import RTVNHIE
|
||||||
|
from .rudo import RudoIE
|
||||||
from .ruhd import RUHDIE
|
from .ruhd import RUHDIE
|
||||||
from .ruleporn import RulePornIE
|
from .ruleporn import RulePornIE
|
||||||
from .rutube import (
|
from .rutube import (
|
||||||
|
@ -219,12 +219,25 @@ class FacebookIE(InfoExtractor):
|
|||||||
|
|
||||||
BEFORE = '{swf.addParam(param[0], param[1]);});'
|
BEFORE = '{swf.addParam(param[0], param[1]);});'
|
||||||
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
|
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
|
||||||
m = re.search(re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER), webpage)
|
PATTERN = re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER)
|
||||||
if m:
|
|
||||||
swf_params = m.group(1).replace('\\\\', '\\').replace('\\"', '"')
|
for m in re.findall(PATTERN, webpage):
|
||||||
|
swf_params = m.replace('\\\\', '\\').replace('\\"', '"')
|
||||||
data = dict(json.loads(swf_params))
|
data = dict(json.loads(swf_params))
|
||||||
params_raw = compat_urllib_parse_unquote(data['params'])
|
params_raw = compat_urllib_parse_unquote(data['params'])
|
||||||
video_data = json.loads(params_raw)['video_data']
|
video_data_candidate = json.loads(params_raw)['video_data']
|
||||||
|
for _, f in video_data_candidate.items():
|
||||||
|
if not f:
|
||||||
|
continue
|
||||||
|
if isinstance(f, dict):
|
||||||
|
f = [f]
|
||||||
|
if not isinstance(f, list):
|
||||||
|
continue
|
||||||
|
if f[0].get('video_id') == video_id:
|
||||||
|
video_data = video_data_candidate
|
||||||
|
break
|
||||||
|
if video_data:
|
||||||
|
break
|
||||||
|
|
||||||
def video_data_list2dict(video_data):
|
def video_data_list2dict(video_data):
|
||||||
ret = {}
|
ret = {}
|
||||||
|
115
youtube_dl/extractor/flipagram.py
Normal file
115
youtube_dl/extractor/flipagram.py
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
float_or_none,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class FlipagramIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?flipagram\.com/f/(?P<id>[^/?#&]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://flipagram.com/f/nyvTSJMKId',
|
||||||
|
'md5': '888dcf08b7ea671381f00fab74692755',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'nyvTSJMKId',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
|
||||||
|
'description': 'md5:d55e32edc55261cae96a41fa85ff630e',
|
||||||
|
'duration': 35.571,
|
||||||
|
'timestamp': 1461244995,
|
||||||
|
'upload_date': '20160421',
|
||||||
|
'uploader': 'kitty juria',
|
||||||
|
'uploader_id': 'sjuria101',
|
||||||
|
'creator': 'kitty juria',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'comments': list,
|
||||||
|
'formats': 'mincount:2',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
video_data = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'window\.reactH2O\s*=\s*({.+});', webpage, 'video data'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
flipagram = video_data['flipagram']
|
||||||
|
video = flipagram['video']
|
||||||
|
|
||||||
|
json_ld = self._search_json_ld(webpage, video_id, default=False)
|
||||||
|
title = json_ld.get('title') or flipagram['captionText']
|
||||||
|
description = json_ld.get('description') or flipagram.get('captionText')
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': video['url'],
|
||||||
|
'width': int_or_none(video.get('width')),
|
||||||
|
'height': int_or_none(video.get('height')),
|
||||||
|
'filesize': int_or_none(video_data.get('size')),
|
||||||
|
}]
|
||||||
|
|
||||||
|
preview_url = try_get(
|
||||||
|
flipagram, lambda x: x['music']['track']['previewUrl'], compat_str)
|
||||||
|
if preview_url:
|
||||||
|
formats.append({
|
||||||
|
'url': preview_url,
|
||||||
|
'ext': 'm4a',
|
||||||
|
'vcodec': 'none',
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
counts = flipagram.get('counts', {})
|
||||||
|
user = flipagram.get('user', {})
|
||||||
|
video_data = flipagram.get('video', {})
|
||||||
|
|
||||||
|
thumbnails = [{
|
||||||
|
'url': self._proto_relative_url(cover['url']),
|
||||||
|
'width': int_or_none(cover.get('width')),
|
||||||
|
'height': int_or_none(cover.get('height')),
|
||||||
|
'filesize': int_or_none(cover.get('size')),
|
||||||
|
} for cover in flipagram.get('covers', []) if cover.get('url')]
|
||||||
|
|
||||||
|
# Note that this only retrieves comments that are initally loaded.
|
||||||
|
# For videos with large amounts of comments, most won't be retrieved.
|
||||||
|
comments = []
|
||||||
|
for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []):
|
||||||
|
text = comment.get('comment')
|
||||||
|
if not text or not isinstance(text, list):
|
||||||
|
continue
|
||||||
|
comments.append({
|
||||||
|
'author': comment.get('user', {}).get('name'),
|
||||||
|
'author_id': comment.get('user', {}).get('username'),
|
||||||
|
'id': comment.get('id'),
|
||||||
|
'text': text[0],
|
||||||
|
'timestamp': unified_timestamp(comment.get('created')),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': float_or_none(flipagram.get('duration'), 1000),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'timestamp': unified_timestamp(flipagram.get('iso8601Created')),
|
||||||
|
'uploader': user.get('name'),
|
||||||
|
'uploader_id': user.get('username'),
|
||||||
|
'creator': user.get('name'),
|
||||||
|
'view_count': int_or_none(counts.get('plays')),
|
||||||
|
'like_count': int_or_none(counts.get('likes')),
|
||||||
|
'repost_count': int_or_none(counts.get('reflips')),
|
||||||
|
'comment_count': int_or_none(counts.get('comments')),
|
||||||
|
'comments': comments,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -1313,6 +1313,38 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'add_ie': ['Kaltura'],
|
'add_ie': ['Kaltura'],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# Non-standard Vimeo embed
|
||||||
|
'url': 'https://openclassrooms.com/courses/understanding-the-web',
|
||||||
|
'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '148867247',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Understanding the web - Teaser',
|
||||||
|
'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
|
||||||
|
'upload_date': '20151214',
|
||||||
|
'uploader': 'OpenClassrooms',
|
||||||
|
'uploader_id': 'openclassrooms',
|
||||||
|
},
|
||||||
|
'add_ie': ['Vimeo'],
|
||||||
|
},
|
||||||
|
# {
|
||||||
|
# # TODO: find another test
|
||||||
|
# # http://schema.org/VideoObject
|
||||||
|
# 'url': 'https://flipagram.com/f/nyvTSJMKId',
|
||||||
|
# 'md5': '888dcf08b7ea671381f00fab74692755',
|
||||||
|
# 'info_dict': {
|
||||||
|
# 'id': 'nyvTSJMKId',
|
||||||
|
# 'ext': 'mp4',
|
||||||
|
# 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
|
||||||
|
# 'description': '#love for cats.',
|
||||||
|
# 'timestamp': 1461244995,
|
||||||
|
# 'upload_date': '20160421',
|
||||||
|
# },
|
||||||
|
# 'params': {
|
||||||
|
# 'force_generic_extractor': True,
|
||||||
|
# },
|
||||||
|
# }
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_following_redirect(self, new_url):
|
def report_following_redirect(self, new_url):
|
||||||
@ -2157,6 +2189,19 @@ class GenericIE(InfoExtractor):
|
|||||||
if embed_url:
|
if embed_url:
|
||||||
return self.url_result(embed_url)
|
return self.url_result(embed_url)
|
||||||
|
|
||||||
|
# Looking for http://schema.org/VideoObject
|
||||||
|
json_ld = self._search_json_ld(
|
||||||
|
webpage, video_id, default=None, expected_type='VideoObject')
|
||||||
|
if json_ld and json_ld.get('url'):
|
||||||
|
info_dict.update({
|
||||||
|
'title': video_title or info_dict['title'],
|
||||||
|
'description': video_description,
|
||||||
|
'thumbnail': video_thumbnail,
|
||||||
|
'age_limit': age_limit
|
||||||
|
})
|
||||||
|
info_dict.update(json_ld)
|
||||||
|
return info_dict
|
||||||
|
|
||||||
def check_video(vurl):
|
def check_video(vurl):
|
||||||
if YoutubeIE.suitable(vurl):
|
if YoutubeIE.suitable(vurl):
|
||||||
return True
|
return True
|
||||||
|
@ -23,6 +23,7 @@ from ..utils import (
|
|||||||
str_or_none,
|
str_or_none,
|
||||||
url_basename,
|
url_basename,
|
||||||
urshift,
|
urshift,
|
||||||
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -89,6 +90,10 @@ class LeIE(InfoExtractor):
|
|||||||
_loc3_ = self.ror(_loc3_, _loc2_ % 17)
|
_loc3_ = self.ror(_loc3_, _loc2_ % 17)
|
||||||
return _loc3_
|
return _loc3_
|
||||||
|
|
||||||
|
# reversed from http://jstatic.letvcdn.com/sdk/player.js
|
||||||
|
def get_mms_key(self, time):
|
||||||
|
return self.ror(time, 8) ^ 185025305
|
||||||
|
|
||||||
# see M3U8Encryption class in KLetvPlayer.swf
|
# see M3U8Encryption class in KLetvPlayer.swf
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def decrypt_m3u8(encrypted_data):
|
def decrypt_m3u8(encrypted_data):
|
||||||
@ -109,23 +114,7 @@ class LeIE(InfoExtractor):
|
|||||||
|
|
||||||
return bytes(_loc7_)
|
return bytes(_loc7_)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _check_errors(self, play_json):
|
||||||
media_id = self._match_id(url)
|
|
||||||
page = self._download_webpage(url, media_id)
|
|
||||||
params = {
|
|
||||||
'id': media_id,
|
|
||||||
'platid': 1,
|
|
||||||
'splatid': 101,
|
|
||||||
'format': 1,
|
|
||||||
'tkey': self.calc_time_key(int(time.time())),
|
|
||||||
'domain': 'www.le.com'
|
|
||||||
}
|
|
||||||
|
|
||||||
play_json = self._download_json(
|
|
||||||
'http://api.le.com/mms/out/video/playJson',
|
|
||||||
media_id, 'Downloading playJson data', query=params,
|
|
||||||
headers=self.geo_verification_headers())
|
|
||||||
|
|
||||||
# Check for errors
|
# Check for errors
|
||||||
playstatus = play_json['playstatus']
|
playstatus = play_json['playstatus']
|
||||||
if playstatus['status'] == 0:
|
if playstatus['status'] == 0:
|
||||||
@ -136,43 +125,99 @@ class LeIE(InfoExtractor):
|
|||||||
msg = 'Generic error. flag = %d' % flag
|
msg = 'Generic error. flag = %d' % flag
|
||||||
raise ExtractorError(msg, expected=True)
|
raise ExtractorError(msg, expected=True)
|
||||||
|
|
||||||
playurl = play_json['playurl']
|
def _real_extract(self, url):
|
||||||
|
media_id = self._match_id(url)
|
||||||
|
page = self._download_webpage(url, media_id)
|
||||||
|
|
||||||
formats = ['350', '1000', '1300', '720p', '1080p']
|
play_json_h5 = self._download_json(
|
||||||
dispatch = playurl['dispatch']
|
'http://api.le.com/mms/out/video/playJsonH5',
|
||||||
|
media_id, 'Downloading html5 playJson data', query={
|
||||||
|
'id': media_id,
|
||||||
|
'platid': 3,
|
||||||
|
'splatid': 304,
|
||||||
|
'format': 1,
|
||||||
|
'tkey': self.get_mms_key(int(time.time())),
|
||||||
|
'domain': 'www.le.com',
|
||||||
|
'tss': 'no',
|
||||||
|
},
|
||||||
|
headers=self.geo_verification_headers())
|
||||||
|
self._check_errors(play_json_h5)
|
||||||
|
|
||||||
urls = []
|
play_json_flash = self._download_json(
|
||||||
for format_id in formats:
|
'http://api.le.com/mms/out/video/playJson',
|
||||||
if format_id in dispatch:
|
media_id, 'Downloading flash playJson data', query={
|
||||||
media_url = playurl['domain'][0] + dispatch[format_id][0]
|
'id': media_id,
|
||||||
media_url += '&' + compat_urllib_parse_urlencode({
|
'platid': 1,
|
||||||
'm3v': 1,
|
'splatid': 101,
|
||||||
|
'format': 1,
|
||||||
|
'tkey': self.calc_time_key(int(time.time())),
|
||||||
|
'domain': 'www.le.com',
|
||||||
|
},
|
||||||
|
headers=self.geo_verification_headers())
|
||||||
|
self._check_errors(play_json_flash)
|
||||||
|
|
||||||
|
def get_h5_urls(media_url, format_id):
|
||||||
|
location = self._download_json(
|
||||||
|
media_url, media_id,
|
||||||
|
'Download JSON metadata for format %s' % format_id, query={
|
||||||
'format': 1,
|
'format': 1,
|
||||||
'expect': 3,
|
'expect': 3,
|
||||||
'rateid': format_id,
|
'tss': 'no',
|
||||||
})
|
})['location']
|
||||||
|
|
||||||
nodes_data = self._download_json(
|
return {
|
||||||
media_url, media_id,
|
'http': update_url_query(location, {'tss': 'no'}),
|
||||||
'Download JSON metadata for format %s' % format_id)
|
'hls': update_url_query(location, {'tss': 'ios'}),
|
||||||
|
}
|
||||||
|
|
||||||
req = self._request_webpage(
|
def get_flash_urls(media_url, format_id):
|
||||||
nodes_data['nodelist'][0]['location'], media_id,
|
media_url += '&' + compat_urllib_parse_urlencode({
|
||||||
note='Downloading m3u8 information for format %s' % format_id)
|
'm3v': 1,
|
||||||
|
'format': 1,
|
||||||
|
'expect': 3,
|
||||||
|
'rateid': format_id,
|
||||||
|
})
|
||||||
|
|
||||||
m3u8_data = self.decrypt_m3u8(req.read())
|
nodes_data = self._download_json(
|
||||||
|
media_url, media_id,
|
||||||
|
'Download JSON metadata for format %s' % format_id)
|
||||||
|
|
||||||
url_info_dict = {
|
req = self._request_webpage(
|
||||||
'url': encode_data_uri(m3u8_data, 'application/vnd.apple.mpegurl'),
|
nodes_data['nodelist'][0]['location'], media_id,
|
||||||
'ext': determine_ext(dispatch[format_id][1]),
|
note='Downloading m3u8 information for format %s' % format_id)
|
||||||
'format_id': format_id,
|
|
||||||
'protocol': 'm3u8',
|
|
||||||
}
|
|
||||||
|
|
||||||
if format_id[-1:] == 'p':
|
m3u8_data = self.decrypt_m3u8(req.read())
|
||||||
url_info_dict['height'] = int_or_none(format_id[:-1])
|
|
||||||
|
|
||||||
urls.append(url_info_dict)
|
return {
|
||||||
|
'hls': encode_data_uri(m3u8_data, 'application/vnd.apple.mpegurl'),
|
||||||
|
}
|
||||||
|
|
||||||
|
extracted_formats = []
|
||||||
|
formats = []
|
||||||
|
for play_json, get_urls in ((play_json_h5, get_h5_urls), (play_json_flash, get_flash_urls)):
|
||||||
|
playurl = play_json['playurl']
|
||||||
|
play_domain = playurl['domain'][0]
|
||||||
|
|
||||||
|
for format_id, format_data in playurl.get('dispatch', []).items():
|
||||||
|
if format_id in extracted_formats:
|
||||||
|
continue
|
||||||
|
extracted_formats.append(format_id)
|
||||||
|
|
||||||
|
media_url = play_domain + format_data[0]
|
||||||
|
for protocol, format_url in get_urls(media_url, format_id).items():
|
||||||
|
f = {
|
||||||
|
'url': format_url,
|
||||||
|
'ext': determine_ext(format_data[1]),
|
||||||
|
'format_id': '%s-%s' % (protocol, format_id),
|
||||||
|
'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
|
||||||
|
'quality': int_or_none(format_id),
|
||||||
|
}
|
||||||
|
|
||||||
|
if format_id[-1:] == 'p':
|
||||||
|
f['height'] = int_or_none(format_id[:-1])
|
||||||
|
|
||||||
|
formats.append(f)
|
||||||
|
self._sort_formats(formats, ('height', 'quality', 'format_id'))
|
||||||
|
|
||||||
publish_time = parse_iso8601(self._html_search_regex(
|
publish_time = parse_iso8601(self._html_search_regex(
|
||||||
r'发布时间 ([^<>]+) ', page, 'publish time', default=None),
|
r'发布时间 ([^<>]+) ', page, 'publish time', default=None),
|
||||||
@ -181,7 +226,7 @@ class LeIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': media_id,
|
'id': media_id,
|
||||||
'formats': urls,
|
'formats': formats,
|
||||||
'title': playurl['title'],
|
'title': playurl['title'],
|
||||||
'thumbnail': playurl['pic'],
|
'thumbnail': playurl['pic'],
|
||||||
'description': description,
|
'description': description,
|
||||||
|
@ -100,7 +100,7 @@ class LyndaIE(LyndaBaseIE):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
|
'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
|
||||||
'md5': '679734f6786145da3546585de9a356be',
|
# md5 is unstable
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '114408',
|
'id': '114408',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -26,7 +26,8 @@ class MGTVIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
api_data = self._download_json(
|
api_data = self._download_json(
|
||||||
'http://v.api.mgtv.com/player/video', video_id,
|
'http://v.api.mgtv.com/player/video', video_id,
|
||||||
query={'video_id': video_id})['data']
|
query={'video_id': video_id},
|
||||||
|
headers=self.geo_verification_headers())['data']
|
||||||
info = api_data['info']
|
info = api_data['info']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import random
|
import random
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
xpath_text,
|
xpath_text,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@ -18,13 +19,16 @@ class MioMioIE(InfoExtractor):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# "type=video" in flashvars
|
# "type=video" in flashvars
|
||||||
'url': 'http://www.miomio.tv/watch/cc88912/',
|
'url': 'http://www.miomio.tv/watch/cc88912/',
|
||||||
'md5': '317a5f7f6b544ce8419b784ca8edae65',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '88912',
|
'id': '88912',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': '【SKY】字幕 铠武昭和VS平成 假面骑士大战FEAT战队 魔星字幕组 字幕',
|
'title': '【SKY】字幕 铠武昭和VS平成 假面骑士大战FEAT战队 魔星字幕组 字幕',
|
||||||
'duration': 5923,
|
'duration': 5923,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# The server provides broken file
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.miomio.tv/watch/cc184024/',
|
'url': 'http://www.miomio.tv/watch/cc184024/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -32,7 +36,7 @@ class MioMioIE(InfoExtractor):
|
|||||||
'title': '《动漫同人插画绘制》',
|
'title': '《动漫同人插画绘制》',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 86,
|
'playlist_mincount': 86,
|
||||||
'skip': 'This video takes time too long for retrieving the URL',
|
'skip': 'Unable to load videos',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.miomio.tv/watch/cc173113/',
|
'url': 'http://www.miomio.tv/watch/cc173113/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -40,20 +44,23 @@ class MioMioIE(InfoExtractor):
|
|||||||
'title': 'The New Macbook 2015 上手试玩与简评'
|
'title': 'The New Macbook 2015 上手试玩与简评'
|
||||||
},
|
},
|
||||||
'playlist_mincount': 2,
|
'playlist_mincount': 2,
|
||||||
|
'skip': 'Unable to load videos',
|
||||||
|
}, {
|
||||||
|
# new 'h5' player
|
||||||
|
'url': 'http://www.miomio.tv/watch/cc273295/',
|
||||||
|
'md5': '',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '273295',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'アウト×デラックス 20160526',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# intermittent HTTP 500
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _extract_mioplayer(self, webpage, video_id, title, http_headers):
|
||||||
video_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
title = self._html_search_meta(
|
|
||||||
'description', webpage, 'title', fatal=True)
|
|
||||||
|
|
||||||
mioplayer_path = self._search_regex(
|
|
||||||
r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path')
|
|
||||||
|
|
||||||
http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path}
|
|
||||||
|
|
||||||
xml_config = self._search_regex(
|
xml_config = self._search_regex(
|
||||||
r'flashvars="type=(?:sina|video)&(.+?)&',
|
r'flashvars="type=(?:sina|video)&(.+?)&',
|
||||||
webpage, 'xml config')
|
webpage, 'xml config')
|
||||||
@ -92,10 +99,34 @@ class MioMioIE(InfoExtractor):
|
|||||||
'http_headers': http_headers,
|
'http_headers': http_headers,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
return entries
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._html_search_meta(
|
||||||
|
'description', webpage, 'title', fatal=True)
|
||||||
|
|
||||||
|
mioplayer_path = self._search_regex(
|
||||||
|
r'src="(/mioplayer(?:_h5)?/[^"]+)"', webpage, 'ref_path')
|
||||||
|
|
||||||
|
if '_h5' in mioplayer_path:
|
||||||
|
player_url = compat_urlparse.urljoin(url, mioplayer_path)
|
||||||
|
player_webpage = self._download_webpage(
|
||||||
|
player_url, video_id,
|
||||||
|
note='Downloading player webpage', headers={'Referer': url})
|
||||||
|
entries = self._parse_html5_media_entries(player_url, player_webpage)
|
||||||
|
http_headers = {'Referer': player_url}
|
||||||
|
else:
|
||||||
|
http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path}
|
||||||
|
entries = self._extract_mioplayer(webpage, video_id, title, http_headers)
|
||||||
|
|
||||||
if len(entries) == 1:
|
if len(entries) == 1:
|
||||||
segment = entries[0]
|
segment = entries[0]
|
||||||
segment['id'] = video_id
|
segment['id'] = video_id
|
||||||
segment['title'] = title
|
segment['title'] = title
|
||||||
|
segment['http_headers'] = http_headers
|
||||||
return segment
|
return segment
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -8,7 +8,7 @@ from ..utils import update_url_query
|
|||||||
|
|
||||||
class NickIE(MTVServicesInfoExtractor):
|
class NickIE(MTVServicesInfoExtractor):
|
||||||
IE_NAME = 'nick.com'
|
IE_NAME = 'nick.com'
|
||||||
_VALID_URL = r'https?://(?:www\.)?nick\.com/videos/clip/(?P<id>[^/?#.]+)'
|
_VALID_URL = r'https?://(?:www\.)?nick(?:jr)?\.com/(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)'
|
||||||
_FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm'
|
_FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.nick.com/videos/clip/alvinnn-and-the-chipmunks-112-full-episode.html',
|
'url': 'http://www.nick.com/videos/clip/alvinnn-and-the-chipmunks-112-full-episode.html',
|
||||||
@ -52,6 +52,9 @@ class NickIE(MTVServicesInfoExtractor):
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.nickjr.com/paw-patrol/videos/pups-save-a-goldrush-s3-ep302-full-episode/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _get_feed_query(self, uri):
|
def _get_feed_query(self, uri):
|
||||||
|
@ -15,7 +15,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class PlayvidIE(InfoExtractor):
|
class PlayvidIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
|
_VALID_URL = r'https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.playvid.com/watch/RnmBNgtrrJu',
|
'url': 'http://www.playvid.com/watch/RnmBNgtrrJu',
|
||||||
'md5': 'ffa2f6b2119af359f544388d8c01eb6c',
|
'md5': 'ffa2f6b2119af359f544388d8c01eb6c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -24,8 +24,19 @@ class PlayvidIE(InfoExtractor):
|
|||||||
'title': 'md5:9256d01c6317e3f703848b5906880dc8',
|
'title': 'md5:9256d01c6317e3f703848b5906880dc8',
|
||||||
'duration': 82,
|
'duration': 82,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
},
|
||||||
}
|
'skip': 'Video removed due to ToS',
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.playvid.com/watch/hwb0GpNkzgH',
|
||||||
|
'md5': '39d49df503ad7b8f23a4432cbf046477',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'hwb0GpNkzgH',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ellen Euro Cutie Blond Takes a Sexy Survey Get Facial in The Park',
|
||||||
|
'age_limit': 18,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
@ -33,6 +33,7 @@ class PolskieRadioIE(InfoExtractor):
|
|||||||
'timestamp': 1456594200,
|
'timestamp': 1456594200,
|
||||||
'upload_date': '20160227',
|
'upload_date': '20160227',
|
||||||
'duration': 2364,
|
'duration': 2364,
|
||||||
|
'thumbnail': 're:^https?://static\.prsa\.pl/images/.*\.jpg$'
|
||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
}, {
|
}, {
|
||||||
@ -68,6 +69,8 @@ class PolskieRadioIE(InfoExtractor):
|
|||||||
r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>',
|
r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>',
|
||||||
webpage, 'timestamp', fatal=False))
|
webpage, 'timestamp', fatal=False))
|
||||||
|
|
||||||
|
thumbnail_url = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
media_urls = set()
|
media_urls = set()
|
||||||
@ -87,6 +90,7 @@ class PolskieRadioIE(InfoExtractor):
|
|||||||
'duration': int_or_none(media.get('length')),
|
'duration': int_or_none(media.get('length')),
|
||||||
'vcodec': 'none' if media.get('provider') == 'audio' else None,
|
'vcodec': 'none' if media.get('provider') == 'audio' else None,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
|
'thumbnail': thumbnail_url
|
||||||
})
|
})
|
||||||
|
|
||||||
title = self._og_search_title(webpage).strip()
|
title = self._og_search_title(webpage).strip()
|
||||||
|
148
youtube_dl/extractor/roosterteeth.py
Normal file
148
youtube_dl/extractor/roosterteeth.py
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
strip_or_none,
|
||||||
|
unescapeHTML,
|
||||||
|
urlencode_postdata,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RoosterTeethIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/episode/(?P<id>[^/?#&]+)'
|
||||||
|
_LOGIN_URL = 'https://roosterteeth.com/login'
|
||||||
|
_NETRC_MACHINE = 'roosterteeth'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
|
||||||
|
'md5': 'e2bd7764732d785ef797700a2489f212',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '26576',
|
||||||
|
'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Million Dollars, But...: Million Dollars, But... The Game Announcement',
|
||||||
|
'description': 'md5:0cc3b21986d54ed815f5faeccd9a9ca5',
|
||||||
|
'thumbnail': 're:^https?://.*\.png$',
|
||||||
|
'series': 'Million Dollars, But...',
|
||||||
|
'episode': 'Million Dollars, But... The Game Announcement',
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://funhaus.roosterteeth.com/episode/funhaus-shorts-2016-austin-sucks-funhaus-shorts',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://screwattack.roosterteeth.com/episode/death-battle-season-3-mewtwo-vs-shadow',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://theknow.roosterteeth.com/episode/the-know-game-news-season-1-boring-steam-sales-are-better',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# only available for FIRST members
|
||||||
|
'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
(username, password) = self._get_login_info()
|
||||||
|
if username is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
login_page = self._download_webpage(
|
||||||
|
self._LOGIN_URL, None,
|
||||||
|
note='Downloading login page',
|
||||||
|
errnote='Unable to download login page')
|
||||||
|
|
||||||
|
login_form = self._hidden_inputs(login_page)
|
||||||
|
|
||||||
|
login_form.update({
|
||||||
|
'username': username,
|
||||||
|
'password': password,
|
||||||
|
})
|
||||||
|
|
||||||
|
login_request = self._download_webpage(
|
||||||
|
self._LOGIN_URL, None,
|
||||||
|
note='Logging in as %s' % username,
|
||||||
|
data=urlencode_postdata(login_form),
|
||||||
|
headers={
|
||||||
|
'Referer': self._LOGIN_URL,
|
||||||
|
})
|
||||||
|
|
||||||
|
if not any(re.search(p, login_request) for p in (
|
||||||
|
r'href=["\']https?://(?:www\.)?roosterteeth\.com/logout"',
|
||||||
|
r'>Sign Out<')):
|
||||||
|
error = self._html_search_regex(
|
||||||
|
r'(?s)<div[^>]+class=(["\']).*?\balert-danger\b.*?\1[^>]*>(?:\s*<button[^>]*>.*?</button>)?(?P<error>.+?)</div>',
|
||||||
|
login_request, 'alert', default=None, group='error')
|
||||||
|
if error:
|
||||||
|
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||||
|
raise ExtractorError('Unable to log in')
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
episode = strip_or_none(unescapeHTML(self._search_regex(
|
||||||
|
(r'videoTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||||
|
r'<title>(?P<title>[^<]+)</title>'), webpage, 'title',
|
||||||
|
default=None, group='title')))
|
||||||
|
|
||||||
|
title = strip_or_none(self._og_search_title(
|
||||||
|
webpage, default=None)) or episode
|
||||||
|
|
||||||
|
m3u8_url = self._search_regex(
|
||||||
|
r'file\s*:\s*(["\'])(?P<url>http.+?\.m3u8.*?)\1',
|
||||||
|
webpage, 'm3u8 url', default=None, group='url')
|
||||||
|
|
||||||
|
if not m3u8_url:
|
||||||
|
if re.search(r'<div[^>]+class=["\']non-sponsor', webpage):
|
||||||
|
self.raise_login_required(
|
||||||
|
'%s is only available for FIRST members' % display_id)
|
||||||
|
|
||||||
|
if re.search(r'<div[^>]+class=["\']golive-gate', webpage):
|
||||||
|
self.raise_login_required('%s is not available yet' % display_id)
|
||||||
|
|
||||||
|
raise ExtractorError('Unable to extract m3u8 URL')
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
m3u8_url, display_id, ext='mp4',
|
||||||
|
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
description = strip_or_none(self._og_search_description(webpage))
|
||||||
|
thumbnail = self._proto_relative_url(self._og_search_thumbnail(webpage))
|
||||||
|
|
||||||
|
series = self._search_regex(
|
||||||
|
(r'<h2>More ([^<]+)</h2>', r'<a[^>]+>See All ([^<]+) Videos<'),
|
||||||
|
webpage, 'series', fatal=False)
|
||||||
|
|
||||||
|
comment_count = int_or_none(self._search_regex(
|
||||||
|
r'>Comments \((\d+)\)<', webpage,
|
||||||
|
'comment count', fatal=False))
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
(r'containerId\s*=\s*["\']episode-(\d+)\1',
|
||||||
|
r'<div[^<]+id=["\']episode-(\d+)'), webpage,
|
||||||
|
'video id', default=display_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'series': series,
|
||||||
|
'episode': episode,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
53
youtube_dl/extractor/rudo.py
Normal file
53
youtube_dl/extractor/rudo.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .jwplatform import JWPlatformBaseIE
|
||||||
|
from ..utils import (
|
||||||
|
js_to_json,
|
||||||
|
get_element_by_class,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RudoIE(JWPlatformBaseIE):
|
||||||
|
_VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://rudo.video/vod/oTzw0MGnyG',
|
||||||
|
'md5': '2a03a5b32dd90a04c83b6d391cf7b415',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'oTzw0MGnyG',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Comentario Tomás Mosciatti',
|
||||||
|
'upload_date': '20160617',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_url(self, webpage):
|
||||||
|
mobj = re.search(
|
||||||
|
'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
|
||||||
|
webpage)
|
||||||
|
if mobj:
|
||||||
|
return mobj.group('url')
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id, encoding='iso-8859-1')
|
||||||
|
|
||||||
|
jwplayer_data = self._parse_json(self._search_regex(
|
||||||
|
r'(?s)playerInstance\.setup\(({.+?})\)', webpage, 'jwplayer data'), video_id,
|
||||||
|
transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s)))
|
||||||
|
|
||||||
|
info_dict = self._parse_jwplayer_data(
|
||||||
|
jwplayer_data, video_id, require_title=False, m3u8_id='hls')
|
||||||
|
|
||||||
|
info_dict.update({
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'upload_date': unified_strdate(get_element_by_class('date', webpage)),
|
||||||
|
})
|
||||||
|
|
||||||
|
return info_dict
|
@ -11,7 +11,7 @@ from ..utils import (
|
|||||||
class SRMediathekIE(ARDMediathekIE):
|
class SRMediathekIE(ARDMediathekIE):
|
||||||
IE_NAME = 'sr:mediathek'
|
IE_NAME = 'sr:mediathek'
|
||||||
IE_DESC = 'Saarländischer Rundfunk'
|
IE_DESC = 'Saarländischer Rundfunk'
|
||||||
_VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://sr-mediathek(?:\.sr-online)?\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=28455',
|
'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=28455',
|
||||||
@ -35,7 +35,9 @@ class SRMediathekIE(ARDMediathekIE):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'expected_warnings': ['Unable to download f4m manifest']
|
}, {
|
||||||
|
'url': 'http://sr-mediathek.de/index.php?seite=7&id=7480',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -9,8 +9,8 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class VidziIE(JWPlatformBaseIE):
|
class VidziIE(JWPlatformBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?P<id>\w+)'
|
_VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://vidzi.tv/cghql9yq6emu.html',
|
'url': 'http://vidzi.tv/cghql9yq6emu.html',
|
||||||
'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
|
'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -22,12 +22,16 @@ class VidziIE(JWPlatformBaseIE):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html',
|
||||||
|
'skip_download': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(
|
||||||
|
'http://vidzi.tv/%s' % video_id, video_id)
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
|
r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
|
||||||
|
|
||||||
|
@ -364,6 +364,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
|
r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
|
||||||
if mobj:
|
if mobj:
|
||||||
return mobj.group(1)
|
return mobj.group(1)
|
||||||
|
# Look more for non-standard embedded Vimeo player
|
||||||
|
mobj = re.search(
|
||||||
|
r'<video[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)(?P=q1)', webpage)
|
||||||
|
if mobj:
|
||||||
|
return mobj.group('url')
|
||||||
|
|
||||||
def _verify_player_video_password(self, url, video_id):
|
def _verify_player_video_password(self, url, video_id):
|
||||||
password = self._downloader.params.get('videopassword')
|
password = self._downloader.params.get('videopassword')
|
||||||
|
@ -9,7 +9,7 @@ from ..compat import (
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
qualities,
|
remove_end,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -22,7 +22,7 @@ class VuClipIE(InfoExtractor):
|
|||||||
'id': '922692425',
|
'id': '922692425',
|
||||||
'ext': '3gp',
|
'ext': '3gp',
|
||||||
'title': 'The Toy Soldiers - Hollywood Movie Trailer',
|
'title': 'The Toy Soldiers - Hollywood Movie Trailer',
|
||||||
'duration': 180,
|
'duration': 177,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -46,34 +46,21 @@ class VuClipIE(InfoExtractor):
|
|||||||
'%s said: %s' % (self.IE_NAME, error_msg), expected=True)
|
'%s said: %s' % (self.IE_NAME, error_msg), expected=True)
|
||||||
|
|
||||||
# These clowns alternate between two page types
|
# These clowns alternate between two page types
|
||||||
links_code = self._search_regex(
|
video_url = self._search_regex(
|
||||||
r'''(?xs)
|
r'<a[^>]+href="([^"]+)"[^>]*><img[^>]+src="[^"]*/play\.gif',
|
||||||
(?:
|
webpage, 'video URL', default=None)
|
||||||
<img\s+src="[^"]*/play.gif".*?>|
|
if video_url:
|
||||||
<!--\ player\ end\ -->\s*</div><!--\ thumb\ end-->
|
formats = [{
|
||||||
)
|
'url': video_url,
|
||||||
(.*?)
|
}]
|
||||||
(?:
|
else:
|
||||||
<a\s+href="fblike|<div\s+class="social">
|
formats = self._parse_html5_media_entries(url, webpage)[0]['formats']
|
||||||
)
|
|
||||||
''', webpage, 'links')
|
|
||||||
title = self._html_search_regex(
|
|
||||||
r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip()
|
|
||||||
|
|
||||||
quality_order = qualities(['Reg', 'Hi'])
|
title = remove_end(self._html_search_regex(
|
||||||
formats = []
|
r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip(), ' - Video')
|
||||||
for url, q in re.findall(
|
|
||||||
r'<a\s+href="(?P<url>[^"]+)".*?>(?:<button[^>]*>)?(?P<q>[^<]+)(?:</button>)?</a>', links_code):
|
|
||||||
format_id = compat_urllib_parse_urlparse(url).scheme + '-' + q
|
|
||||||
formats.append({
|
|
||||||
'format_id': format_id,
|
|
||||||
'url': url,
|
|
||||||
'quality': quality_order(q),
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
duration = parse_duration(self._search_regex(
|
duration = parse_duration(self._html_search_regex(
|
||||||
r'\(([0-9:]+)\)</span>', webpage, 'duration', fatal=False))
|
r'[(>]([0-9]+:[0-9]+)(?:<span|\))', webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -137,7 +137,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
# Two-Factor
|
# Two-Factor
|
||||||
# TODO add SMS and phone call support - these require making a request and then prompting the user
|
# TODO add SMS and phone call support - these require making a request and then prompting the user
|
||||||
|
|
||||||
if re.search(r'(?i)<form[^>]* id="challenge"', login_results) is not None:
|
if re.search(r'(?i)<form[^>]+id="challenge"', login_results) is not None:
|
||||||
tfa_code = self._get_tfa_info('2-step verification code')
|
tfa_code = self._get_tfa_info('2-step verification code')
|
||||||
|
|
||||||
if not tfa_code:
|
if not tfa_code:
|
||||||
@ -165,17 +165,17 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
if tfa_results is False:
|
if tfa_results is False:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if re.search(r'(?i)<form[^>]* id="challenge"', tfa_results) is not None:
|
if re.search(r'(?i)<form[^>]+id="challenge"', tfa_results) is not None:
|
||||||
self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
|
self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
|
||||||
return False
|
return False
|
||||||
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
|
if re.search(r'(?i)<form[^>]+id="gaia_loginform"', tfa_results) is not None:
|
||||||
self._downloader.report_warning('unable to log in - did the page structure change?')
|
self._downloader.report_warning('unable to log in - did the page structure change?')
|
||||||
return False
|
return False
|
||||||
if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
|
if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
|
||||||
self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
|
self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
|
if re.search(r'(?i)<form[^>]+id="gaia_loginform"', login_results) is not None:
|
||||||
self._downloader.report_warning('unable to log in: bad username or password')
|
self._downloader.report_warning('unable to log in: bad username or password')
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
@ -1978,10 +1978,13 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
|
|||||||
return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
|
return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
|
||||||
else super(YoutubeChannelIE, cls).suitable(url))
|
else super(YoutubeChannelIE, cls).suitable(url))
|
||||||
|
|
||||||
|
def _build_template_url(self, url, channel_id):
|
||||||
|
return self._TEMPLATE_URL % channel_id
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
channel_id = self._match_id(url)
|
channel_id = self._match_id(url)
|
||||||
|
|
||||||
url = self._TEMPLATE_URL % channel_id
|
url = self._build_template_url(url, channel_id)
|
||||||
|
|
||||||
# Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
|
# Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
|
||||||
# Workaround by extracting as a playlist if managed to obtain channel playlist URL
|
# Workaround by extracting as a playlist if managed to obtain channel playlist URL
|
||||||
@ -2038,8 +2041,8 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
|
|||||||
|
|
||||||
class YoutubeUserIE(YoutubeChannelIE):
|
class YoutubeUserIE(YoutubeChannelIE):
|
||||||
IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
|
IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
|
||||||
_VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/|c/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
|
_VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
|
||||||
_TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
|
_TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
|
||||||
IE_NAME = 'youtube:user'
|
IE_NAME = 'youtube:user'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@ -2049,12 +2052,24 @@ class YoutubeUserIE(YoutubeChannelIE):
|
|||||||
'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
|
'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
|
||||||
'title': 'Uploads from The Linux Foundation',
|
'title': 'Uploads from The Linux Foundation',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# Only available via https://www.youtube.com/c/12minuteathlete/videos
|
||||||
|
# but not https://www.youtube.com/user/12minuteathlete/videos
|
||||||
|
'url': 'https://www.youtube.com/c/12minuteathlete/videos',
|
||||||
|
'playlist_mincount': 249,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
|
||||||
|
'title': 'Uploads from 12 Minute Athlete',
|
||||||
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'ytuser:phihag',
|
'url': 'ytuser:phihag',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.youtube.com/c/gametrailers',
|
'url': 'https://www.youtube.com/c/gametrailers',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.youtube.com/gametrailers',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# This channel is not available.
|
# This channel is not available.
|
||||||
'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
|
'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
|
||||||
@ -2071,6 +2086,10 @@ class YoutubeUserIE(YoutubeChannelIE):
|
|||||||
else:
|
else:
|
||||||
return super(YoutubeUserIE, cls).suitable(url)
|
return super(YoutubeUserIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _build_template_url(self, url, channel_id):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
|
||||||
|
|
||||||
|
|
||||||
class YoutubeLiveIE(YoutubeBaseInfoExtractor):
|
class YoutubeLiveIE(YoutubeBaseInfoExtractor):
|
||||||
IE_DESC = 'YouTube.com live streams'
|
IE_DESC = 'YouTube.com live streams'
|
||||||
|
@ -26,7 +26,11 @@ def parseOpts(overrideArguments=None):
|
|||||||
except IOError:
|
except IOError:
|
||||||
return default # silently skip if file is not present
|
return default # silently skip if file is not present
|
||||||
try:
|
try:
|
||||||
res = compat_shlex_split(optionf.read(), comments=True)
|
# FIXME: https://github.com/rg3/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
|
||||||
|
contents = optionf.read()
|
||||||
|
if sys.version_info < (3,):
|
||||||
|
contents = contents.decode(preferredencoding())
|
||||||
|
res = compat_shlex_split(contents, comments=True)
|
||||||
finally:
|
finally:
|
||||||
optionf.close()
|
optionf.close()
|
||||||
return res
|
return res
|
||||||
|
@ -363,8 +363,10 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
|
|||||||
input_files = [filename] + sub_filenames
|
input_files = [filename] + sub_filenames
|
||||||
|
|
||||||
opts = [
|
opts = [
|
||||||
'-map', '0',
|
'-map', '0:v',
|
||||||
'-c', 'copy',
|
'-c:v', 'copy',
|
||||||
|
'-map', '0:a',
|
||||||
|
'-c:a', 'copy',
|
||||||
# Don't copy the existing subtitles, we may be running the
|
# Don't copy the existing subtitles, we may be running the
|
||||||
# postprocessor a second time
|
# postprocessor a second time
|
||||||
'-map', '-0:s',
|
'-map', '-0:s',
|
||||||
|
@ -2126,6 +2126,42 @@ def mimetype2ext(mt):
|
|||||||
}.get(res, res)
|
}.get(res, res)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_codecs(codecs_str):
|
||||||
|
# http://tools.ietf.org/html/rfc6381
|
||||||
|
if not codecs_str:
|
||||||
|
return {}
|
||||||
|
splited_codecs = list(filter(None, map(
|
||||||
|
lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
|
||||||
|
vcodec, acodec = None, None
|
||||||
|
for full_codec in splited_codecs:
|
||||||
|
codec = full_codec.split('.')[0]
|
||||||
|
if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'):
|
||||||
|
if not vcodec:
|
||||||
|
vcodec = full_codec
|
||||||
|
elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac'):
|
||||||
|
if not acodec:
|
||||||
|
acodec = full_codec
|
||||||
|
else:
|
||||||
|
write_string('WARNING: Unknown codec %s' % full_codec, sys.stderr)
|
||||||
|
if not vcodec and not acodec:
|
||||||
|
if len(splited_codecs) == 2:
|
||||||
|
return {
|
||||||
|
'vcodec': vcodec,
|
||||||
|
'acodec': acodec,
|
||||||
|
}
|
||||||
|
elif len(splited_codecs) == 1:
|
||||||
|
return {
|
||||||
|
'vcodec': 'none',
|
||||||
|
'acodec': vcodec,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
'vcodec': vcodec or 'none',
|
||||||
|
'acodec': acodec or 'none',
|
||||||
|
}
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
def urlhandle_detect_ext(url_handle):
|
def urlhandle_detect_ext(url_handle):
|
||||||
getheader = url_handle.headers.get
|
getheader = url_handle.headers.get
|
||||||
|
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2016.07.07'
|
__version__ = '2016.07.11'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user