Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
bc6ea7bdf0
1
AUTHORS
1
AUTHORS
@ -103,3 +103,4 @@ Christopher Krooss
|
|||||||
Ondřej Caletka
|
Ondřej Caletka
|
||||||
Dinesh S
|
Dinesh S
|
||||||
Johan K. Jensen
|
Johan K. Jensen
|
||||||
|
Yen Chi Hsuan
|
||||||
|
27
README.md
27
README.md
@ -267,10 +267,22 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
by extension for the extensions aac, m4a,
|
by extension for the extensions aac, m4a,
|
||||||
mp3, mp4, ogg, wav, webm. You can also use
|
mp3, mp4, ogg, wav, webm. You can also use
|
||||||
the special names "best", "bestvideo",
|
the special names "best", "bestvideo",
|
||||||
"bestaudio", "worst". By default, youtube-
|
"bestaudio", "worst". You can filter the
|
||||||
dl will pick the best quality. Use commas
|
video results by putting a condition in
|
||||||
to download multiple audio formats, such as
|
brackets, as in -f "best[height=720]" (or
|
||||||
-f
|
-f "[filesize>10M]"). This works for
|
||||||
|
filesize, height, width, tbr, abr, and vbr
|
||||||
|
and the comparisons <, <=, >, >=, =, != .
|
||||||
|
Formats for which the value is not known
|
||||||
|
are excluded unless you put a question mark
|
||||||
|
(?) after the operator. You can combine
|
||||||
|
format filters, so -f "[height <=?
|
||||||
|
720][tbr>500]" selects up to 720p videos
|
||||||
|
(or videos where the height is not known)
|
||||||
|
with a bitrate of at least 500 KBit/s. By
|
||||||
|
default, youtube-dl will pick the best
|
||||||
|
quality. Use commas to download multiple
|
||||||
|
audio formats, such as -f
|
||||||
136/137/mp4/bestvideo,140/m4a/bestaudio.
|
136/137/mp4/bestvideo,140/m4a/bestaudio.
|
||||||
You can merge the video and audio of two
|
You can merge the video and audio of two
|
||||||
formats into a single file using -f <video-
|
formats into a single file using -f <video-
|
||||||
@ -304,7 +316,8 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
|
|
||||||
## Authentication Options:
|
## Authentication Options:
|
||||||
-u, --username USERNAME login with this account ID
|
-u, --username USERNAME login with this account ID
|
||||||
-p, --password PASSWORD account password
|
-p, --password PASSWORD account password. If this option is left
|
||||||
|
out, youtube-dl will ask interactively.
|
||||||
-2, --twofactor TWOFACTOR two-factor auth code
|
-2, --twofactor TWOFACTOR two-factor auth code
|
||||||
-n, --netrc use .netrc authentication data
|
-n, --netrc use .netrc authentication data
|
||||||
--video-password PASSWORD video password (vimeo, smotri)
|
--video-password PASSWORD video password (vimeo, smotri)
|
||||||
@ -487,6 +500,10 @@ To make a different directory work - either for ffmpeg, or for youtube-dl, or fo
|
|||||||
|
|
||||||
From then on, after restarting your shell, you will be able to access both youtube-dl and ffmpeg (and youtube-dl will be able to find ffmpeg) by simply typing `youtube-dl` or `ffmpeg`, no matter what directory you're in.
|
From then on, after restarting your shell, you will be able to access both youtube-dl and ffmpeg (and youtube-dl will be able to find ffmpeg) by simply typing `youtube-dl` or `ffmpeg`, no matter what directory you're in.
|
||||||
|
|
||||||
|
### How do I put downloads into a specific folder?
|
||||||
|
|
||||||
|
Use the `-o` to specify an [output template](#output-template), for example `-o "/home/user/videos/%(title)s-%(id)s.%(ext)s"`. If you want this for all of your downloads, put the option into your [configuration file](#configuration).
|
||||||
|
|
||||||
### How can I detect whether a given URL is supported by youtube-dl?
|
### How can I detect whether a given URL is supported by youtube-dl?
|
||||||
|
|
||||||
For one, have a look at the [list of supported sites](docs/supportedsites). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
For one, have a look at the [list of supported sites](docs/supportedsites). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
||||||
|
@ -281,6 +281,61 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
downloaded = ydl.downloaded_info_dicts[0]
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
self.assertEqual(downloaded['format_id'], f1id)
|
self.assertEqual(downloaded['format_id'], f1id)
|
||||||
|
|
||||||
|
def test_format_filtering(self):
|
||||||
|
formats = [
|
||||||
|
{'format_id': 'A', 'filesize': 500, 'width': 1000},
|
||||||
|
{'format_id': 'B', 'filesize': 1000, 'width': 500},
|
||||||
|
{'format_id': 'C', 'filesize': 1000, 'width': 400},
|
||||||
|
{'format_id': 'D', 'filesize': 2000, 'width': 600},
|
||||||
|
{'format_id': 'E', 'filesize': 3000},
|
||||||
|
{'format_id': 'F'},
|
||||||
|
{'format_id': 'G', 'filesize': 1000000},
|
||||||
|
]
|
||||||
|
for f in formats:
|
||||||
|
f['url'] = 'http://_/'
|
||||||
|
f['ext'] = 'unknown'
|
||||||
|
info_dict = _make_result(formats)
|
||||||
|
|
||||||
|
ydl = YDL({'format': 'best[filesize<3000]'})
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'D')
|
||||||
|
|
||||||
|
ydl = YDL({'format': 'best[filesize<=3000]'})
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'E')
|
||||||
|
|
||||||
|
ydl = YDL({'format': 'best[filesize <= ? 3000]'})
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'F')
|
||||||
|
|
||||||
|
ydl = YDL({'format': 'best [filesize = 1000] [width>450]'})
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'B')
|
||||||
|
|
||||||
|
ydl = YDL({'format': 'best [filesize = 1000] [width!=450]'})
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'C')
|
||||||
|
|
||||||
|
ydl = YDL({'format': '[filesize>?1]'})
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'G')
|
||||||
|
|
||||||
|
ydl = YDL({'format': '[filesize<1M]'})
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'E')
|
||||||
|
|
||||||
|
ydl = YDL({'format': '[filesize<1MiB]'})
|
||||||
|
ydl.process_ie_result(info_dict)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'G')
|
||||||
|
|
||||||
def test_add_extra_info(self):
|
def test_add_extra_info(self):
|
||||||
test_dict = {
|
test_dict = {
|
||||||
'extractor': 'Foo',
|
'extractor': 'Foo',
|
||||||
|
@ -14,7 +14,6 @@ from test.helper import gettestcases
|
|||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import (
|
||||||
FacebookIE,
|
FacebookIE,
|
||||||
gen_extractors,
|
gen_extractors,
|
||||||
TwitchIE,
|
|
||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -72,18 +71,6 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
||||||
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
||||||
|
|
||||||
def test_twitch_channelid_matching(self):
|
|
||||||
self.assertTrue(TwitchIE.suitable('twitch.tv/vanillatv'))
|
|
||||||
self.assertTrue(TwitchIE.suitable('www.twitch.tv/vanillatv'))
|
|
||||||
self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv'))
|
|
||||||
self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv/'))
|
|
||||||
|
|
||||||
def test_twitch_videoid_matching(self):
|
|
||||||
self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv/b/328087483'))
|
|
||||||
|
|
||||||
def test_twitch_chapterid_matching(self):
|
|
||||||
self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/tsm_theoddone/c/2349361'))
|
|
||||||
|
|
||||||
def test_youtube_extract(self):
|
def test_youtube_extract(self):
|
||||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
||||||
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||||
@ -115,8 +102,6 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertMatch(':ythistory', ['youtube:history'])
|
self.assertMatch(':ythistory', ['youtube:history'])
|
||||||
self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
|
self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
|
||||||
self.assertMatch(':tds', ['ComedyCentralShows'])
|
self.assertMatch(':tds', ['ComedyCentralShows'])
|
||||||
self.assertMatch(':colbertreport', ['ComedyCentralShows'])
|
|
||||||
self.assertMatch(':cr', ['ComedyCentralShows'])
|
|
||||||
|
|
||||||
def test_vimeo_matching(self):
|
def test_vimeo_matching(self):
|
||||||
self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
|
self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
|
||||||
|
@ -28,6 +28,7 @@ from youtube_dl.utils import (
|
|||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
InAdvancePagedList,
|
InAdvancePagedList,
|
||||||
intlist_to_bytes,
|
intlist_to_bytes,
|
||||||
|
is_html,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
limit_length,
|
limit_length,
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
@ -417,5 +418,21 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
|
|||||||
self.assertTrue(age_restricted(18, 14))
|
self.assertTrue(age_restricted(18, 14))
|
||||||
self.assertFalse(age_restricted(18, 18))
|
self.assertFalse(age_restricted(18, 18))
|
||||||
|
|
||||||
|
def test_is_html(self):
|
||||||
|
self.assertFalse(is_html(b'\x49\x44\x43<html'))
|
||||||
|
self.assertTrue(is_html(b'<!DOCTYPE foo>\xaaa'))
|
||||||
|
self.assertTrue(is_html( # UTF-8 with BOM
|
||||||
|
b'\xef\xbb\xbf<!DOCTYPE foo>\xaaa'))
|
||||||
|
self.assertTrue(is_html( # UTF-16-LE
|
||||||
|
b'\xff\xfe<\x00h\x00t\x00m\x00l\x00>\x00\xe4\x00'
|
||||||
|
))
|
||||||
|
self.assertTrue(is_html( # UTF-16-BE
|
||||||
|
b'\xfe\xff\x00<\x00h\x00t\x00m\x00l\x00>\x00\xe4'
|
||||||
|
))
|
||||||
|
self.assertTrue(is_html( # UTF-32-BE
|
||||||
|
b'\x00\x00\xFE\xFF\x00\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4'))
|
||||||
|
self.assertTrue(is_html( # UTF-32-LE
|
||||||
|
b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00'))
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -10,6 +10,7 @@ import io
|
|||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
import locale
|
import locale
|
||||||
|
import operator
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
import re
|
import re
|
||||||
@ -49,6 +50,7 @@ from .utils import (
|
|||||||
make_HTTPS_handler,
|
make_HTTPS_handler,
|
||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
PagedList,
|
PagedList,
|
||||||
|
parse_filesize,
|
||||||
PostProcessingError,
|
PostProcessingError,
|
||||||
platform_name,
|
platform_name,
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
@ -768,7 +770,59 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
raise Exception('Invalid result type: %s' % result_type)
|
raise Exception('Invalid result type: %s' % result_type)
|
||||||
|
|
||||||
|
def _apply_format_filter(self, format_spec, available_formats):
|
||||||
|
" Returns a tuple of the remaining format_spec and filtered formats "
|
||||||
|
|
||||||
|
OPERATORS = {
|
||||||
|
'<': operator.lt,
|
||||||
|
'<=': operator.le,
|
||||||
|
'>': operator.gt,
|
||||||
|
'>=': operator.ge,
|
||||||
|
'=': operator.eq,
|
||||||
|
'!=': operator.ne,
|
||||||
|
}
|
||||||
|
operator_rex = re.compile(r'''(?x)\s*\[
|
||||||
|
(?P<key>width|height|tbr|abr|vbr|filesize)
|
||||||
|
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
||||||
|
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
|
||||||
|
\]$
|
||||||
|
''' % '|'.join(map(re.escape, OPERATORS.keys())))
|
||||||
|
m = operator_rex.search(format_spec)
|
||||||
|
if not m:
|
||||||
|
raise ValueError('Invalid format specification %r' % format_spec)
|
||||||
|
|
||||||
|
try:
|
||||||
|
comparison_value = int(m.group('value'))
|
||||||
|
except ValueError:
|
||||||
|
comparison_value = parse_filesize(m.group('value'))
|
||||||
|
if comparison_value is None:
|
||||||
|
comparison_value = parse_filesize(m.group('value') + 'B')
|
||||||
|
if comparison_value is None:
|
||||||
|
raise ValueError(
|
||||||
|
'Invalid value %r in format specification %r' % (
|
||||||
|
m.group('value'), format_spec))
|
||||||
|
op = OPERATORS[m.group('op')]
|
||||||
|
|
||||||
|
def _filter(f):
|
||||||
|
actual_value = f.get(m.group('key'))
|
||||||
|
if actual_value is None:
|
||||||
|
return m.group('none_inclusive')
|
||||||
|
return op(actual_value, comparison_value)
|
||||||
|
new_formats = [f for f in available_formats if _filter(f)]
|
||||||
|
|
||||||
|
new_format_spec = format_spec[:-len(m.group(0))]
|
||||||
|
if not new_format_spec:
|
||||||
|
new_format_spec = 'best'
|
||||||
|
|
||||||
|
return (new_format_spec, new_formats)
|
||||||
|
|
||||||
def select_format(self, format_spec, available_formats):
|
def select_format(self, format_spec, available_formats):
|
||||||
|
while format_spec.endswith(']'):
|
||||||
|
format_spec, available_formats = self._apply_format_filter(
|
||||||
|
format_spec, available_formats)
|
||||||
|
if not available_formats:
|
||||||
|
return None
|
||||||
|
|
||||||
if format_spec == 'best' or format_spec is None:
|
if format_spec == 'best' or format_spec is None:
|
||||||
return available_formats[-1]
|
return available_formats[-1]
|
||||||
elif format_spec == 'worst':
|
elif format_spec == 'worst':
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .abc import ABCIE
|
from .abc import ABCIE
|
||||||
|
from .abc7news import Abc7NewsIE
|
||||||
from .academicearth import AcademicEarthCourseIE
|
from .academicearth import AcademicEarthCourseIE
|
||||||
from .addanime import AddAnimeIE
|
from .addanime import AddAnimeIE
|
||||||
from .adobetv import AdobeTVIE
|
from .adobetv import AdobeTVIE
|
||||||
@ -175,6 +176,7 @@ from .goshgay import GoshgayIE
|
|||||||
from .grooveshark import GroovesharkIE
|
from .grooveshark import GroovesharkIE
|
||||||
from .groupon import GrouponIE
|
from .groupon import GrouponIE
|
||||||
from .hark import HarkIE
|
from .hark import HarkIE
|
||||||
|
from .hearthisat import HearThisAtIE
|
||||||
from .heise import HeiseIE
|
from .heise import HeiseIE
|
||||||
from .hellporno import HellPornoIE
|
from .hellporno import HellPornoIE
|
||||||
from .helsinki import HelsinkiIE
|
from .helsinki import HelsinkiIE
|
||||||
@ -409,6 +411,7 @@ from .stanfordoc import StanfordOpenClassroomIE
|
|||||||
from .steam import SteamIE
|
from .steam import SteamIE
|
||||||
from .streamcloud import StreamcloudIE
|
from .streamcloud import StreamcloudIE
|
||||||
from .streamcz import StreamCZIE
|
from .streamcz import StreamCZIE
|
||||||
|
from .streetvoice import StreetVoiceIE
|
||||||
from .sunporno import SunPornoIE
|
from .sunporno import SunPornoIE
|
||||||
from .swrmediathek import SWRMediathekIE
|
from .swrmediathek import SWRMediathekIE
|
||||||
from .syfy import SyfyIE
|
from .syfy import SyfyIE
|
||||||
@ -430,6 +433,7 @@ from .telemb import TeleMBIE
|
|||||||
from .teletask import TeleTaskIE
|
from .teletask import TeleTaskIE
|
||||||
from .tenplay import TenPlayIE
|
from .tenplay import TenPlayIE
|
||||||
from .testurl import TestURLIE
|
from .testurl import TestURLIE
|
||||||
|
from .testtube import TestTubeIE
|
||||||
from .tf1 import TF1IE
|
from .tf1 import TF1IE
|
||||||
from .theonion import TheOnionIE
|
from .theonion import TheOnionIE
|
||||||
from .theplatform import ThePlatformIE
|
from .theplatform import ThePlatformIE
|
||||||
@ -458,7 +462,14 @@ from .tvigle import TvigleIE
|
|||||||
from .tvp import TvpIE, TvpSeriesIE
|
from .tvp import TvpIE, TvpSeriesIE
|
||||||
from .tvplay import TVPlayIE
|
from .tvplay import TVPlayIE
|
||||||
from .twentyfourvideo import TwentyFourVideoIE
|
from .twentyfourvideo import TwentyFourVideoIE
|
||||||
from .twitch import TwitchIE
|
from .twitch import (
|
||||||
|
TwitchVideoIE,
|
||||||
|
TwitchChapterIE,
|
||||||
|
TwitchVodIE,
|
||||||
|
TwitchProfileIE,
|
||||||
|
TwitchPastBroadcastsIE,
|
||||||
|
TwitchStreamIE,
|
||||||
|
)
|
||||||
from .ubu import UbuIE
|
from .ubu import UbuIE
|
||||||
from .udemy import (
|
from .udemy import (
|
||||||
UdemyIE,
|
UdemyIE,
|
||||||
|
68
youtube_dl/extractor/abc7news.py
Normal file
68
youtube_dl/extractor/abc7news.py
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import parse_iso8601
|
||||||
|
|
||||||
|
|
||||||
|
class Abc7NewsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://abc7news\.com(?:/[^/]+/(?P<display_id>[^/]+))?/(?P<id>\d+)'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '472581',
|
||||||
|
'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'East Bay museum celebrates history of synthesized music',
|
||||||
|
'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': 1421123075,
|
||||||
|
'upload_date': '20150113',
|
||||||
|
'uploader': 'Jonathan Bloom',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://abc7news.com/472581',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
display_id = mobj.group('display_id') or video_id
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
m3u8 = self._html_search_meta(
|
||||||
|
'contentURL', webpage, 'm3u8 url', fatal=True)
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage).strip()
|
||||||
|
description = self._og_search_description(webpage).strip()
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
timestamp = parse_iso8601(self._search_regex(
|
||||||
|
r'<div class="meta">\s*<time class="timeago" datetime="([^"]+)">',
|
||||||
|
webpage, 'upload date', fatal=False))
|
||||||
|
uploader = self._search_regex(
|
||||||
|
r'rel="author">([^<]+)</a>',
|
||||||
|
webpage, 'uploader', default=None)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'uploader': uploader,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -161,7 +161,8 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
entries = [
|
entries = [
|
||||||
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
||||||
for t_path in tracks_paths]
|
for t_path in tracks_paths]
|
||||||
title = self._search_regex(r'album_title : "(.*?)"', webpage, 'title')
|
title = self._search_regex(
|
||||||
|
r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False)
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
'id': playlist_id,
|
'id': playlist_id,
|
||||||
|
@ -51,7 +51,7 @@ class CNNIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
path = mobj.group('path')
|
path = mobj.group('path')
|
||||||
page_title = mobj.group('title')
|
page_title = mobj.group('title')
|
||||||
info_url = 'http://cnn.com/video/data/3.0/%s/index.xml' % path
|
info_url = 'http://edition.cnn.com/video/data/3.0/%s/index.xml' % path
|
||||||
info = self._download_xml(info_url, page_title)
|
info = self._download_xml(info_url, page_title)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
@ -143,13 +143,13 @@ class CNNArticleIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!video/)'
|
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!video/)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
|
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
|
||||||
'md5': '275b326f85d80dff7592a9820f5dc887',
|
'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'bestoftv/2014/12/21/sotu-crowley-president-obama-north-korea-not-going-to-be-intimidated.cnn',
|
'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Obama: We\'re not going to be intimidated',
|
'title': 'Obama: Cyberattack not an act of war',
|
||||||
'description': 'md5:e735586f3dc936075fa654a4d91b21f9',
|
'description': 'md5:51ce6750450603795cad0cdfbd7d05c5',
|
||||||
'upload_date': '20141220',
|
'upload_date': '20141221',
|
||||||
},
|
},
|
||||||
'add_ie': ['CNN'],
|
'add_ie': ['CNN'],
|
||||||
}
|
}
|
||||||
|
@ -34,12 +34,12 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
|
|||||||
|
|
||||||
class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
||||||
IE_DESC = 'The Daily Show / The Colbert Report'
|
IE_DESC = 'The Daily Show / The Colbert Report'
|
||||||
# urls can be abbreviations like :thedailyshow or :colbert
|
# urls can be abbreviations like :thedailyshow
|
||||||
# urls for episodes like:
|
# urls for episodes like:
|
||||||
# or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
|
# or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
|
||||||
# or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
|
# or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
|
||||||
# or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
|
# or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
|
||||||
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
|
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow)
|
||||||
|https?://(:www\.)?
|
|https?://(:www\.)?
|
||||||
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
|
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
|
||||||
((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
|
((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
|
||||||
|
@ -7,10 +7,9 @@ from ..compat import (
|
|||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
parse_iso8601,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
unified_strdate,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -28,68 +27,81 @@ class FourTubeIE(InfoExtractor):
|
|||||||
'uploader': 'WCP Club',
|
'uploader': 'WCP Club',
|
||||||
'uploader_id': 'wcp-club',
|
'uploader_id': 'wcp-club',
|
||||||
'upload_date': '20131031',
|
'upload_date': '20131031',
|
||||||
|
'timestamp': 1383263892,
|
||||||
'duration': 583,
|
'duration': 583,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'categories': list,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage_url = 'http://www.4tube.com/videos/' + video_id
|
webpage = self._download_webpage(url, video_id)
|
||||||
webpage = self._download_webpage(webpage_url, video_id)
|
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
title = self._html_search_meta('name', webpage)
|
||||||
|
timestamp = parse_iso8601(self._html_search_meta(
|
||||||
|
'uploadDate', webpage))
|
||||||
|
thumbnail = self._html_search_meta('thumbnailUrl', webpage)
|
||||||
|
uploader_id = self._html_search_regex(
|
||||||
|
r'<a class="img-avatar" href="[^"]+/channels/([^/"]+)" title="Go to [^"]+ page">',
|
||||||
|
webpage, 'uploader id')
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
r'<a class="img-avatar" href="[^"]+/channels/[^/"]+" title="Go to ([^"]+) page">',
|
||||||
|
webpage, 'uploader')
|
||||||
|
|
||||||
playlist_json = self._html_search_regex(r'var playerConfigPlaylist\s+=\s+([^;]+)', webpage, 'Playlist')
|
categories_html = self._search_regex(
|
||||||
media_id = self._search_regex(r'idMedia:\s*(\d+)', playlist_json, 'Media Id')
|
r'(?s)><i class="icon icon-tag"></i>\s*Categories / Tags\s*.*?<ul class="list">(.*?)</ul>',
|
||||||
sources = self._search_regex(r'sources:\s*\[([^\]]*)\]', playlist_json, 'Sources').split(',')
|
webpage, 'categories', fatal=False)
|
||||||
title = self._search_regex(r'title:\s*"([^"]*)', playlist_json, 'Title')
|
categories = None
|
||||||
thumbnail_url = self._search_regex(r'image:\s*"([^"]*)', playlist_json, 'Thumbnail', fatal=False)
|
if categories_html:
|
||||||
|
categories = [
|
||||||
|
c.strip() for c in re.findall(
|
||||||
|
r'(?s)<li><a.*?>(.*?)</a>', categories_html)]
|
||||||
|
|
||||||
uploader_str = self._search_regex(r'<span>Uploaded by</span>(.*?)<span>', webpage, 'uploader', fatal=False)
|
view_count = str_to_int(self._search_regex(
|
||||||
mobj = re.search(r'<a href="/sites/(?P<id>[^"]+)"><strong>(?P<name>[^<]+)</strong></a>', uploader_str)
|
r'<meta itemprop="interactionCount" content="UserPlays:([0-9,]+)">',
|
||||||
(uploader, uploader_id) = (mobj.group('name'), mobj.group('id')) if mobj else (clean_html(uploader_str), None)
|
webpage, 'view count', fatal=False))
|
||||||
|
like_count = str_to_int(self._search_regex(
|
||||||
|
r'<meta itemprop="interactionCount" content="UserLikes:([0-9,]+)">',
|
||||||
|
webpage, 'like count', fatal=False))
|
||||||
|
duration = parse_duration(self._html_search_meta('duration', webpage))
|
||||||
|
|
||||||
upload_date = None
|
params_js = self._search_regex(
|
||||||
view_count = None
|
r'\$\.ajax\(url,\ opts\);\s*\}\s*\}\)\(([0-9,\[\] ]+)\)',
|
||||||
duration = None
|
webpage, 'initialization parameters'
|
||||||
description = self._html_search_meta('description', webpage, 'description')
|
)
|
||||||
if description:
|
params = self._parse_json('[%s]' % params_js, video_id)
|
||||||
upload_date = self._search_regex(r'Published Date: (\d{2} [a-zA-Z]{3} \d{4})', description, 'upload date',
|
media_id = params[0]
|
||||||
fatal=False)
|
sources = ['%s' % p for p in params[2]]
|
||||||
if upload_date:
|
|
||||||
upload_date = unified_strdate(upload_date)
|
|
||||||
view_count = self._search_regex(r'Views: ([\d,\.]+)', description, 'view count', fatal=False)
|
|
||||||
if view_count:
|
|
||||||
view_count = str_to_int(view_count)
|
|
||||||
duration = parse_duration(self._search_regex(r'Length: (\d+m\d+s)', description, 'duration', fatal=False))
|
|
||||||
|
|
||||||
token_url = "http://tkn.4tube.com/{0}/desktop/{1}".format(media_id, "+".join(sources))
|
token_url = 'http://tkn.4tube.com/{0}/desktop/{1}'.format(
|
||||||
|
media_id, '+'.join(sources))
|
||||||
headers = {
|
headers = {
|
||||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||||
b'Origin': b'http://www.4tube.com',
|
b'Origin': b'http://www.4tube.com',
|
||||||
}
|
}
|
||||||
token_req = compat_urllib_request.Request(token_url, b'{}', headers)
|
token_req = compat_urllib_request.Request(token_url, b'{}', headers)
|
||||||
tokens = self._download_json(token_req, video_id)
|
tokens = self._download_json(token_req, video_id)
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': tokens[format]['token'],
|
'url': tokens[format]['token'],
|
||||||
'format_id': format + 'p',
|
'format_id': format + 'p',
|
||||||
'resolution': format + 'p',
|
'resolution': format + 'p',
|
||||||
'quality': int(format),
|
'quality': int(format),
|
||||||
} for format in sources]
|
} for format in sources]
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': thumbnail_url,
|
'categories': categories,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
'upload_date': upload_date,
|
'timestamp': timestamp,
|
||||||
|
'like_count': like_count,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'webpage_url': webpage_url,
|
|
||||||
}
|
}
|
||||||
|
@ -1,8 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@ -29,9 +27,7 @@ class GameStarIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
og_title = self._og_search_title(webpage)
|
og_title = self._og_search_title(webpage)
|
||||||
|
@ -17,6 +17,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
|
is_html,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
parse_xml,
|
parse_xml,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
@ -647,7 +648,7 @@ class GenericIE(InfoExtractor):
|
|||||||
# Maybe it's a direct link to a video?
|
# Maybe it's a direct link to a video?
|
||||||
# Be careful not to download the whole thing!
|
# Be careful not to download the whole thing!
|
||||||
first_bytes = full_response.read(512)
|
first_bytes = full_response.read(512)
|
||||||
if not re.match(r'^\s*<', first_bytes.decode('utf-8', 'replace')):
|
if not is_html(first_bytes):
|
||||||
self._downloader.report_warning(
|
self._downloader.report_warning(
|
||||||
'URL could be a direct video link, returning it as such.')
|
'URL could be a direct video link, returning it as such.')
|
||||||
upload_date = unified_strdate(
|
upload_date = unified_strdate(
|
||||||
|
117
youtube_dl/extractor/hearthisat.py
Normal file
117
youtube_dl/extractor/hearthisat.py
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_urllib_request,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
HEADRequest,
|
||||||
|
str_to_int,
|
||||||
|
urlencode_postdata,
|
||||||
|
urlhandle_detect_ext,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class HearThisAtIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$'
|
||||||
|
_PLAYLIST_URL = 'https://hearthis.at/playlist.php'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://hearthis.at/moofi/dr-kreep',
|
||||||
|
'md5': 'ab6ec33c8fed6556029337c7885eb4e0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '150939',
|
||||||
|
'ext': 'wav',
|
||||||
|
'title': 'Moofi - Dr. Kreep',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': 1421564134,
|
||||||
|
'description': 'Creepy Patch. Mutable Instruments Braids Vowel + Formant Mode.',
|
||||||
|
'upload_date': '20150118',
|
||||||
|
'comment_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'duration': 71,
|
||||||
|
'categories': ['Experimental'],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
m = re.match(self._VALID_URL, url)
|
||||||
|
display_id = '{artist:s} - {title:s}'.format(**m.groupdict())
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
track_id = self._search_regex(
|
||||||
|
r'intTrackId\s*=\s*(\d+)', webpage, 'track ID')
|
||||||
|
|
||||||
|
payload = urlencode_postdata({'tracks[]': track_id})
|
||||||
|
req = compat_urllib_request.Request(self._PLAYLIST_URL, payload)
|
||||||
|
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||||
|
|
||||||
|
track = self._download_json(req, track_id, 'Downloading playlist')[0]
|
||||||
|
title = '{artist:s} - {title:s}'.format(**track)
|
||||||
|
|
||||||
|
categories = None
|
||||||
|
if track.get('category'):
|
||||||
|
categories = [track['category']]
|
||||||
|
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
|
meta_span = r'<span[^>]+class="%s".*?</i>([^<]+)</span>'
|
||||||
|
view_count = str_to_int(self._search_regex(
|
||||||
|
meta_span % 'plays_count', webpage, 'view count', fatal=False))
|
||||||
|
like_count = str_to_int(self._search_regex(
|
||||||
|
meta_span % 'likes_count', webpage, 'like count', fatal=False))
|
||||||
|
comment_count = str_to_int(self._search_regex(
|
||||||
|
meta_span % 'comment_count', webpage, 'comment count', fatal=False))
|
||||||
|
duration = str_to_int(self._search_regex(
|
||||||
|
r'data-length="(\d+)', webpage, 'duration', fatal=False))
|
||||||
|
timestamp = str_to_int(self._search_regex(
|
||||||
|
r'<span[^>]+class="calctime"[^>]+data-time="(\d+)', webpage, 'timestamp', fatal=False))
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
mp3_url = self._search_regex(
|
||||||
|
r'(?s)<a class="player-link"\s+(?:[a-zA-Z0-9_:-]+="[^"]+"\s+)*?data-mp3="([^"]+)"',
|
||||||
|
webpage, 'mp3 URL', fatal=False)
|
||||||
|
if mp3_url:
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'mp3',
|
||||||
|
'vcodec': 'none',
|
||||||
|
'acodec': 'mp3',
|
||||||
|
'url': mp3_url,
|
||||||
|
})
|
||||||
|
download_path = self._search_regex(
|
||||||
|
r'<a class="[^"]*download_fct[^"]*"\s+href="([^"]+)"',
|
||||||
|
webpage, 'download URL', default=None)
|
||||||
|
if download_path:
|
||||||
|
download_url = compat_urlparse.urljoin(url, download_path)
|
||||||
|
ext_req = HEADRequest(download_url)
|
||||||
|
ext_handle = self._request_webpage(
|
||||||
|
ext_req, display_id, note='Determining extension')
|
||||||
|
ext = urlhandle_detect_ext(ext_handle)
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'download',
|
||||||
|
'vcodec': 'none',
|
||||||
|
'ext': ext,
|
||||||
|
'url': download_url,
|
||||||
|
'preference': 2, # Usually better quality
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': track_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
'like_count': like_count,
|
||||||
|
'categories': categories,
|
||||||
|
}
|
@ -6,6 +6,7 @@ import json
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_str,
|
compat_str,
|
||||||
|
compat_HTTPError,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@ -78,6 +79,16 @@ class NBCNewsIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'add_ie': ['ThePlatform'],
|
'add_ie': ['ThePlatform'],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
|
||||||
|
'md5': 'fdbf39ab73a72df5896b6234ff98518a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Wjf9EDR3A_60',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'FULL EPISODE: Family Business',
|
||||||
|
'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -115,10 +126,19 @@ class NBCNewsIE(InfoExtractor):
|
|||||||
if not base_url:
|
if not base_url:
|
||||||
continue
|
continue
|
||||||
playlist_url = base_url + '?form=MPXNBCNewsAPI'
|
playlist_url = base_url + '?form=MPXNBCNewsAPI'
|
||||||
all_videos = self._download_json(playlist_url, title)['videos']
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
info = next(v for v in all_videos if v['mpxId'] == mpxid)
|
all_videos = self._download_json(playlist_url, title)
|
||||||
|
except ExtractorError as ee:
|
||||||
|
if isinstance(ee.cause, compat_HTTPError):
|
||||||
|
continue
|
||||||
|
raise
|
||||||
|
|
||||||
|
if not all_videos or 'videos' not in all_videos:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
info = next(v for v in all_videos['videos'] if v['mpxId'] == mpxid)
|
||||||
break
|
break
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
continue
|
continue
|
||||||
|
@ -27,9 +27,7 @@ class NDTVIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
filename = self._search_regex(
|
filename = self._search_regex(
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
|
@ -10,6 +10,7 @@ from ..compat import (
|
|||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
)
|
)
|
||||||
from ..aes import (
|
from ..aes import (
|
||||||
@ -44,6 +45,15 @@ class PornHubIE(InfoExtractor):
|
|||||||
req.add_header('Cookie', 'age_verified=1')
|
req.add_header('Cookie', 'age_verified=1')
|
||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
|
error_msg = self._html_search_regex(
|
||||||
|
r'(?s)<div class="userMessageSection[^"]*".*?>(.*?)</div>',
|
||||||
|
webpage, 'error message', default=None)
|
||||||
|
if error_msg:
|
||||||
|
error_msg = re.sub(r'\s+', ' ', error_msg)
|
||||||
|
raise ExtractorError(
|
||||||
|
'PornHub said: %s' % error_msg,
|
||||||
|
expected=True, video_id=video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
|
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
|
||||||
video_uploader = self._html_search_regex(
|
video_uploader = self._html_search_regex(
|
||||||
r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<',
|
r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<',
|
||||||
|
51
youtube_dl/extractor/streetvoice.py
Normal file
51
youtube_dl/extractor/streetvoice.py
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import unified_strdate
|
||||||
|
|
||||||
|
|
||||||
|
class StreetVoiceIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://streetvoice.com/skippylu/songs/94440/',
|
||||||
|
'md5': '15974627fc01a29e492c98593c2fd472',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '94440',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'filesize': 4167053,
|
||||||
|
'title': '輸',
|
||||||
|
'description': 'Crispy脆樂團 - 輸',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 260,
|
||||||
|
'upload_date': '20091018',
|
||||||
|
'uploader': 'Crispy脆樂團',
|
||||||
|
'uploader_id': '627810',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://tw.streetvoice.com/skippylu/songs/94440/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
song_id = self._match_id(url)
|
||||||
|
|
||||||
|
song = self._download_json(
|
||||||
|
'http://streetvoice.com/music/api/song/%s' % song_id, song_id)
|
||||||
|
|
||||||
|
title = song['name']
|
||||||
|
author = song['musician']['name']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': song_id,
|
||||||
|
'url': song['file'],
|
||||||
|
'filesize': song.get('size'),
|
||||||
|
'title': title,
|
||||||
|
'description': '%s - %s' % (author, title),
|
||||||
|
'thumbnail': self._proto_relative_url(song.get('image'), 'http:'),
|
||||||
|
'duration': song.get('length'),
|
||||||
|
'upload_date': unified_strdate(song.get('created_at')),
|
||||||
|
'uploader': author,
|
||||||
|
'uploader_id': compat_str(song['musician']['id']),
|
||||||
|
}
|
60
youtube_dl/extractor/testtube.py
Normal file
60
youtube_dl/extractor/testtube.py
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class TestTubeIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://testtube\.com/[^/?#]+/(?P<id>[^/?#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '60163',
|
||||||
|
'display_id': '5-weird-ways-plants-can-eat-animals',
|
||||||
|
'duration': 275,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '5 Weird Ways Plants Can Eat Animals',
|
||||||
|
'description': 'Why have some plants evolved to eat meat?',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'uploader': 'DNews',
|
||||||
|
'uploader_id': 'dnews',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r"player\.loadRevision3Item\('video_id',\s*([0-9]+)\);",
|
||||||
|
webpage, 'video ID')
|
||||||
|
|
||||||
|
all_info = self._download_json(
|
||||||
|
'https://testtube.com/api/getPlaylist.json?api_key=ba9c741bce1b9d8e3defcc22193f3651b8867e62&codecs=h264,vp8,theora&video_id=%s' % video_id,
|
||||||
|
video_id)
|
||||||
|
info = all_info['items'][0]
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for vcodec, fdatas in info['media'].items():
|
||||||
|
for name, fdata in fdatas.items():
|
||||||
|
formats.append({
|
||||||
|
'format_id': '%s-%s' % (vcodec, name),
|
||||||
|
'url': fdata['url'],
|
||||||
|
'vcodec': vcodec,
|
||||||
|
'tbr': fdata.get('bitrate'),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
duration = int_or_none(info.get('duration'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': info['title'],
|
||||||
|
'description': info.get('summary'),
|
||||||
|
'thumbnail': info.get('images', {}).get('large'),
|
||||||
|
'uploader': info.get('show', {}).get('name'),
|
||||||
|
'uploader_id': info.get('show', {}).get('slug'),
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -9,17 +9,23 @@ from ..utils import ExtractorError
|
|||||||
class TinyPicIE(InfoExtractor):
|
class TinyPicIE(InfoExtractor):
|
||||||
IE_NAME = 'tinypic'
|
IE_NAME = 'tinypic'
|
||||||
IE_DESC = 'tinypic.com videos'
|
IE_DESC = 'tinypic.com videos'
|
||||||
_VALID_URL = r'http://tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+'
|
_VALID_URL = r'http://(?:.+?\.)?tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
|
{
|
||||||
'url': 'http://tinypic.com/player.php?v=6xw7tc%3E&s=5#.UtqZmbRFCM8',
|
'url': 'http://tinypic.com/player.php?v=6xw7tc%3E&s=5#.UtqZmbRFCM8',
|
||||||
'md5': '609b74432465364e72727ebc6203f044',
|
'md5': '609b74432465364e72727ebc6203f044',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '6xw7tc',
|
'id': '6xw7tc',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'shadow phenomenon weird',
|
'title': 'shadow phenomenon weird',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://de.tinypic.com/player.php?v=dy90yh&s=8',
|
||||||
|
'only_matching': True,
|
||||||
}
|
}
|
||||||
}
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@ -12,61 +12,59 @@ class TvpIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem/wideo/odc-2/4278035',
|
'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem/wideo/odc-2/4278035',
|
||||||
|
'md5': 'cdd98303338b8a7f7abab5cd14092bf2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4278035',
|
'id': '4278035',
|
||||||
'ext': 'wmv',
|
'ext': 'wmv',
|
||||||
'title': 'Ogniem i mieczem, odc. 2',
|
'title': 'Ogniem i mieczem, odc. 2',
|
||||||
'description': 'Bohun dowiaduje się o złamaniu przez kniahinię danego mu słowa i wyrusza do Rozłogów. Helenie w ostatniej chwili udaje się uciec dzięki pomocy Zagłoby.',
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://vod.tvp.pl/seriale/obyczajowe/czas-honoru/sezon-1-1-13/i-seria-odc-13/194536',
|
'url': 'http://vod.tvp.pl/seriale/obyczajowe/czas-honoru/sezon-1-1-13/i-seria-odc-13/194536',
|
||||||
|
'md5': '8aa518c15e5cc32dfe8db400dc921fbb',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '194536',
|
'id': '194536',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Czas honoru, I seria – odc. 13',
|
'title': 'Czas honoru, I seria – odc. 13',
|
||||||
# 'description': 'WŁADEK\nCzesław prosi Marię o dostarczenie Władkowi zarazki tyfusu. Jeśli zachoruje zostanie przewieziony do szpitala skąd łatwiej będzie go odbić. Czy matka zdecyduje się zarazić syna? Karol odwiedza Wandę przyznaje się, że ją oszukiwał, ale ostrzega też, że grozi jej aresztowanie i nalega, żeby wyjechała z Warszawy. Czy dziewczyna zdecyduje się znów oddalić od ukochanego? Rozpoczyna się akcja odbicia Władka.',
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
|
'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
|
||||||
|
'md5': 'c3b15ed1af288131115ff17a17c19dda',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '17916176',
|
'id': '17916176',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata',
|
'title': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata',
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': 'true',
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
|
'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
|
||||||
|
'md5': 'c3b15ed1af288131115ff17a17c19dda',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '17834272',
|
'id': '17834272',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Na sygnale, odc. 39',
|
'title': 'Na sygnale, odc. 39',
|
||||||
'description': 'Ekipa Wiktora ratuje młodą matkę, która spadła ze schodów trzymając na rękach noworodka. Okazuje się, że dziewczyna jest surogatką, a biologiczni rodzice dziecka próbują zmusić ją do oddania synka…',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': 'true',
|
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id)
|
'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id)
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
title = self._search_regex(
|
||||||
series = self._search_regex(
|
r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1',
|
||||||
r'{name:\s*([\'"])SeriesTitle\1,\s*value:\s*\1(?P<series>.*?)\1},',
|
webpage, 'title', group='title')
|
||||||
|
series_title = self._search_regex(
|
||||||
|
r'name\s*:\s*([\'"])SeriesTitle\1\s*,\s*value\s*:\s*\1(?P<series>.+?)\1',
|
||||||
webpage, 'series', group='series', default=None)
|
webpage, 'series', group='series', default=None)
|
||||||
if series is not None and series not in title:
|
if series_title:
|
||||||
title = '%s, %s' % (series, title)
|
title = '%s, %s' % (series_title, title)
|
||||||
description = self._og_search_description(webpage, default=None)
|
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r"poster\s*:\s*'([^']+)'", webpage, 'thumbnail', default=None)
|
||||||
|
|
||||||
video_url = self._search_regex(
|
video_url = self._search_regex(
|
||||||
r'0:{src:([\'"])(?P<url>.*?)\1', webpage, 'formats', group='url', default=None)
|
r'0:{src:([\'"])(?P<url>.*?)\1', webpage, 'formats', group='url', default=None)
|
||||||
if video_url is None:
|
if not video_url:
|
||||||
video_url = self._download_json(
|
video_url = self._download_json(
|
||||||
'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id,
|
'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id,
|
||||||
video_id)['video_url']
|
video_id)['video_url']
|
||||||
@ -89,8 +87,7 @@ class TvpIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': thumbnail,
|
||||||
'description': description,
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3,9 +3,11 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
import re
|
import re
|
||||||
|
import random
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
)
|
)
|
||||||
@ -15,44 +17,12 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TwitchIE(InfoExtractor):
|
class TwitchBaseIE(InfoExtractor):
|
||||||
# TODO: One broadcast may be split into multiple videos. The key
|
_VALID_URL_BASE = r'https?://(?:www\.)?twitch\.tv'
|
||||||
# 'broadcast_id' is the same for all parts, and 'broadcast_part'
|
|
||||||
# starts at 1 and increases. Can we treat all parts as one video?
|
|
||||||
_VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?twitch\.tv/
|
|
||||||
(?:
|
|
||||||
(?P<channelid>[^/]+)|
|
|
||||||
(?:(?:[^/]+)/v/(?P<vodid>[^/]+))|
|
|
||||||
(?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
|
|
||||||
(?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
|
|
||||||
)
|
|
||||||
/?(?:\#.*)?$
|
|
||||||
"""
|
|
||||||
_PAGE_LIMIT = 100
|
|
||||||
_API_BASE = 'https://api.twitch.tv'
|
_API_BASE = 'https://api.twitch.tv'
|
||||||
|
_USHER_BASE = 'http://usher.twitch.tv'
|
||||||
_LOGIN_URL = 'https://secure.twitch.tv/user/login'
|
_LOGIN_URL = 'https://secure.twitch.tv/user/login'
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.twitch.tv/riotgames/b/577357806',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'a577357806',
|
|
||||||
'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 12,
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.twitch.tv/acracingleague/c/5285812',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'c5285812',
|
|
||||||
'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 3,
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.twitch.tv/vanillatv',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'vanillatv',
|
|
||||||
'title': 'VanillaTV',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 412,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _handle_error(self, response):
|
def _handle_error(self, response):
|
||||||
if not isinstance(response, dict):
|
if not isinstance(response, dict):
|
||||||
@ -64,71 +34,10 @@ class TwitchIE(InfoExtractor):
|
|||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
def _download_json(self, url, video_id, note='Downloading JSON metadata'):
|
def _download_json(self, url, video_id, note='Downloading JSON metadata'):
|
||||||
response = super(TwitchIE, self)._download_json(url, video_id, note)
|
response = super(TwitchBaseIE, self)._download_json(url, video_id, note)
|
||||||
self._handle_error(response)
|
self._handle_error(response)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def _extract_media(self, item, item_id):
|
|
||||||
ITEMS = {
|
|
||||||
'a': 'video',
|
|
||||||
'v': 'vod',
|
|
||||||
'c': 'chapter',
|
|
||||||
}
|
|
||||||
info = self._extract_info(self._download_json(
|
|
||||||
'%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
|
|
||||||
'Downloading %s info JSON' % ITEMS[item]))
|
|
||||||
|
|
||||||
if item == 'v':
|
|
||||||
access_token = self._download_json(
|
|
||||||
'%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
|
|
||||||
'Downloading %s access token' % ITEMS[item])
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
'http://usher.twitch.tv/vod/%s?nauth=%s&nauthsig=%s'
|
|
||||||
% (item_id, access_token['token'], access_token['sig']),
|
|
||||||
item_id, 'mp4')
|
|
||||||
info['formats'] = formats
|
|
||||||
return info
|
|
||||||
|
|
||||||
response = self._download_json(
|
|
||||||
'%s/api/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
|
|
||||||
'Downloading %s playlist JSON' % ITEMS[item])
|
|
||||||
entries = []
|
|
||||||
chunks = response['chunks']
|
|
||||||
qualities = list(chunks.keys())
|
|
||||||
for num, fragment in enumerate(zip(*chunks.values()), start=1):
|
|
||||||
formats = []
|
|
||||||
for fmt_num, fragment_fmt in enumerate(fragment):
|
|
||||||
format_id = qualities[fmt_num]
|
|
||||||
fmt = {
|
|
||||||
'url': fragment_fmt['url'],
|
|
||||||
'format_id': format_id,
|
|
||||||
'quality': 1 if format_id == 'live' else 0,
|
|
||||||
}
|
|
||||||
m = re.search(r'^(?P<height>\d+)[Pp]', format_id)
|
|
||||||
if m:
|
|
||||||
fmt['height'] = int(m.group('height'))
|
|
||||||
formats.append(fmt)
|
|
||||||
self._sort_formats(formats)
|
|
||||||
entry = dict(info)
|
|
||||||
entry['id'] = '%s_%d' % (entry['id'], num)
|
|
||||||
entry['title'] = '%s part %d' % (entry['title'], num)
|
|
||||||
entry['formats'] = formats
|
|
||||||
entries.append(entry)
|
|
||||||
return self.playlist_result(entries, info['id'], info['title'])
|
|
||||||
|
|
||||||
def _extract_info(self, info):
|
|
||||||
return {
|
|
||||||
'id': info['_id'],
|
|
||||||
'title': info['title'],
|
|
||||||
'description': info['description'],
|
|
||||||
'duration': info['length'],
|
|
||||||
'thumbnail': info['preview'],
|
|
||||||
'uploader': info['channel']['display_name'],
|
|
||||||
'uploader_id': info['channel']['name'],
|
|
||||||
'timestamp': parse_iso8601(info['recorded_at']),
|
|
||||||
'view_count': info['views'],
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
@ -167,66 +76,139 @@ class TwitchIE(InfoExtractor):
|
|||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Unable to login: %s' % m.group('msg').strip(), expected=True)
|
'Unable to login: %s' % m.group('msg').strip(), expected=True)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
if mobj.group('chapterid'):
|
|
||||||
return self._extract_media('c', mobj.group('chapterid'))
|
|
||||||
|
|
||||||
"""
|
class TwitchItemBaseIE(TwitchBaseIE):
|
||||||
webpage = self._download_webpage(url, chapter_id)
|
def _download_info(self, item, item_id):
|
||||||
m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
|
return self._extract_info(self._download_json(
|
||||||
if not m:
|
'%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
|
||||||
raise ExtractorError('Cannot find archive of a chapter')
|
'Downloading %s info JSON' % self._ITEM_TYPE))
|
||||||
archive_id = m.group(1)
|
|
||||||
|
|
||||||
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
|
def _extract_media(self, item_id):
|
||||||
doc = self._download_xml(
|
info = self._download_info(self._ITEM_SHORTCUT, item_id)
|
||||||
api, chapter_id,
|
response = self._download_json(
|
||||||
note='Downloading chapter information',
|
'%s/api/videos/%s%s' % (self._API_BASE, self._ITEM_SHORTCUT, item_id), item_id,
|
||||||
errnote='Chapter information download failed')
|
'Downloading %s playlist JSON' % self._ITEM_TYPE)
|
||||||
for a in doc.findall('.//archive'):
|
entries = []
|
||||||
if archive_id == a.find('./id').text:
|
chunks = response['chunks']
|
||||||
break
|
qualities = list(chunks.keys())
|
||||||
else:
|
for num, fragment in enumerate(zip(*chunks.values()), start=1):
|
||||||
raise ExtractorError('Could not find chapter in chapter information')
|
formats = []
|
||||||
|
for fmt_num, fragment_fmt in enumerate(fragment):
|
||||||
video_url = a.find('./video_file_url').text
|
format_id = qualities[fmt_num]
|
||||||
video_ext = video_url.rpartition('.')[2] or 'flv'
|
fmt = {
|
||||||
|
'url': fragment_fmt['url'],
|
||||||
chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
|
'format_id': format_id,
|
||||||
chapter_info = self._download_json(
|
'quality': 1 if format_id == 'live' else 0,
|
||||||
chapter_api_url, 'c' + chapter_id,
|
|
||||||
note='Downloading chapter metadata',
|
|
||||||
errnote='Download of chapter metadata failed')
|
|
||||||
|
|
||||||
bracket_start = int(doc.find('.//bracket_start').text)
|
|
||||||
bracket_end = int(doc.find('.//bracket_end').text)
|
|
||||||
|
|
||||||
# TODO determine start (and probably fix up file)
|
|
||||||
# youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
|
|
||||||
#video_url += '?start=' + TODO:start_timestamp
|
|
||||||
# bracket_start is 13290, but we want 51670615
|
|
||||||
self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
|
|
||||||
'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
|
|
||||||
|
|
||||||
info = {
|
|
||||||
'id': 'c' + chapter_id,
|
|
||||||
'url': video_url,
|
|
||||||
'ext': video_ext,
|
|
||||||
'title': chapter_info['title'],
|
|
||||||
'thumbnail': chapter_info['preview'],
|
|
||||||
'description': chapter_info['description'],
|
|
||||||
'uploader': chapter_info['channel']['display_name'],
|
|
||||||
'uploader_id': chapter_info['channel']['name'],
|
|
||||||
}
|
}
|
||||||
|
m = re.search(r'^(?P<height>\d+)[Pp]', format_id)
|
||||||
|
if m:
|
||||||
|
fmt['height'] = int(m.group('height'))
|
||||||
|
formats.append(fmt)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
entry = dict(info)
|
||||||
|
entry['id'] = '%s_%d' % (entry['id'], num)
|
||||||
|
entry['title'] = '%s part %d' % (entry['title'], num)
|
||||||
|
entry['formats'] = formats
|
||||||
|
entries.append(entry)
|
||||||
|
return self.playlist_result(entries, info['id'], info['title'])
|
||||||
|
|
||||||
|
def _extract_info(self, info):
|
||||||
|
return {
|
||||||
|
'id': info['_id'],
|
||||||
|
'title': info['title'],
|
||||||
|
'description': info['description'],
|
||||||
|
'duration': info['length'],
|
||||||
|
'thumbnail': info['preview'],
|
||||||
|
'uploader': info['channel']['display_name'],
|
||||||
|
'uploader_id': info['channel']['name'],
|
||||||
|
'timestamp': parse_iso8601(info['recorded_at']),
|
||||||
|
'view_count': info['views'],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return self._extract_media(self._match_id(url))
|
||||||
|
|
||||||
|
|
||||||
|
class TwitchVideoIE(TwitchItemBaseIE):
|
||||||
|
IE_NAME = 'twitch:video'
|
||||||
|
_VALID_URL = r'%s/[^/]+/b/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
|
||||||
|
_ITEM_TYPE = 'video'
|
||||||
|
_ITEM_SHORTCUT = 'a'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.twitch.tv/riotgames/b/577357806',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'a577357806',
|
||||||
|
'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 12,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TwitchChapterIE(TwitchItemBaseIE):
|
||||||
|
IE_NAME = 'twitch:chapter'
|
||||||
|
_VALID_URL = r'%s/[^/]+/c/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
|
||||||
|
_ITEM_TYPE = 'chapter'
|
||||||
|
_ITEM_SHORTCUT = 'c'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.twitch.tv/acracingleague/c/5285812',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'c5285812',
|
||||||
|
'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 3,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.twitch.tv/tsm_theoddone/c/2349361',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class TwitchVodIE(TwitchItemBaseIE):
|
||||||
|
IE_NAME = 'twitch:vod'
|
||||||
|
_VALID_URL = r'%s/[^/]+/v/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
|
||||||
|
_ITEM_TYPE = 'vod'
|
||||||
|
_ITEM_SHORTCUT = 'v'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.twitch.tv/ksptv/v/3622000',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'v3622000',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '''KSPTV: Squadcast: "Everyone's on vacation so here's Dahud" Edition!''',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 6951,
|
||||||
|
'timestamp': 1419028564,
|
||||||
|
'upload_date': '20141219',
|
||||||
|
'uploader': 'KSPTV',
|
||||||
|
'uploader_id': 'ksptv',
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
item_id = self._match_id(url)
|
||||||
|
info = self._download_info(self._ITEM_SHORTCUT, item_id)
|
||||||
|
access_token = self._download_json(
|
||||||
|
'%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
|
||||||
|
'Downloading %s access token' % self._ITEM_TYPE)
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
'%s/vod/%s?nauth=%s&nauthsig=%s'
|
||||||
|
% (self._USHER_BASE, item_id, access_token['token'], access_token['sig']),
|
||||||
|
item_id, 'mp4')
|
||||||
|
info['formats'] = formats
|
||||||
return info
|
return info
|
||||||
"""
|
|
||||||
elif mobj.group('videoid'):
|
|
||||||
return self._extract_media('a', mobj.group('videoid'))
|
class TwitchPlaylistBaseIE(TwitchBaseIE):
|
||||||
elif mobj.group('vodid'):
|
_PLAYLIST_URL = '%s/kraken/channels/%%s/videos/?offset=%%d&limit=%%d' % TwitchBaseIE._API_BASE
|
||||||
return self._extract_media('v', mobj.group('vodid'))
|
_PAGE_LIMIT = 100
|
||||||
elif mobj.group('channelid'):
|
|
||||||
channel_id = mobj.group('channelid')
|
def _extract_playlist(self, channel_id):
|
||||||
info = self._download_json(
|
info = self._download_json(
|
||||||
'%s/kraken/channels/%s' % (self._API_BASE, channel_id),
|
'%s/kraken/channels/%s' % (self._API_BASE, channel_id),
|
||||||
channel_id, 'Downloading channel info JSON')
|
channel_id, 'Downloading channel info JSON')
|
||||||
@ -236,12 +218,134 @@ class TwitchIE(InfoExtractor):
|
|||||||
limit = self._PAGE_LIMIT
|
limit = self._PAGE_LIMIT
|
||||||
for counter in itertools.count(1):
|
for counter in itertools.count(1):
|
||||||
response = self._download_json(
|
response = self._download_json(
|
||||||
'%s/kraken/channels/%s/videos/?offset=%d&limit=%d'
|
self._PLAYLIST_URL % (channel_id, offset, limit),
|
||||||
% (self._API_BASE, channel_id, offset, limit),
|
channel_id, 'Downloading %s videos JSON page %d' % (self._PLAYLIST_TYPE, counter))
|
||||||
channel_id, 'Downloading channel videos JSON page %d' % counter)
|
|
||||||
videos = response['videos']
|
videos = response['videos']
|
||||||
if not videos:
|
if not videos:
|
||||||
break
|
break
|
||||||
entries.extend([self.url_result(video['url'], 'Twitch') for video in videos])
|
entries.extend([self.url_result(video['url']) for video in videos])
|
||||||
offset += limit
|
offset += limit
|
||||||
return self.playlist_result(entries, channel_id, channel_name)
|
return self.playlist_result(entries, channel_id, channel_name)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return self._extract_playlist(self._match_id(url))
|
||||||
|
|
||||||
|
|
||||||
|
class TwitchProfileIE(TwitchPlaylistBaseIE):
|
||||||
|
IE_NAME = 'twitch:profile'
|
||||||
|
_VALID_URL = r'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
|
||||||
|
_PLAYLIST_TYPE = 'profile'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.twitch.tv/vanillatv/profile',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'vanillatv',
|
||||||
|
'title': 'VanillaTV',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 412,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE):
|
||||||
|
IE_NAME = 'twitch:past_broadcasts'
|
||||||
|
_VALID_URL = r'%s/(?P<id>[^/]+)/profile/past_broadcasts/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
|
||||||
|
_PLAYLIST_URL = TwitchPlaylistBaseIE._PLAYLIST_URL + '&broadcasts=true'
|
||||||
|
_PLAYLIST_TYPE = 'past broadcasts'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.twitch.tv/spamfish/profile/past_broadcasts',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'spamfish',
|
||||||
|
'title': 'Spamfish',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 54,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TwitchStreamIE(TwitchBaseIE):
|
||||||
|
IE_NAME = 'twitch:stream'
|
||||||
|
_VALID_URL = r'%s/(?P<id>[^/]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.twitch.tv/shroomztv',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '12772022048',
|
||||||
|
'display_id': 'shroomztv',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV',
|
||||||
|
'is_live': True,
|
||||||
|
'timestamp': 1421928037,
|
||||||
|
'upload_date': '20150122',
|
||||||
|
'uploader': 'ShroomzTV',
|
||||||
|
'uploader_id': 'shroomztv',
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
channel_id = self._match_id(url)
|
||||||
|
|
||||||
|
stream = self._download_json(
|
||||||
|
'%s/kraken/streams/%s' % (self._API_BASE, channel_id), channel_id,
|
||||||
|
'Downloading stream JSON').get('stream')
|
||||||
|
|
||||||
|
# Fallback on profile extraction if stream is offline
|
||||||
|
if not stream:
|
||||||
|
return self.url_result(
|
||||||
|
'http://www.twitch.tv/%s/profile' % channel_id,
|
||||||
|
'TwitchProfile', channel_id)
|
||||||
|
|
||||||
|
access_token = self._download_json(
|
||||||
|
'%s/api/channels/%s/access_token' % (self._API_BASE, channel_id), channel_id,
|
||||||
|
'Downloading channel access token')
|
||||||
|
|
||||||
|
query = {
|
||||||
|
'allow_source': 'true',
|
||||||
|
'p': random.randint(1000000, 10000000),
|
||||||
|
'player': 'twitchweb',
|
||||||
|
'segment_preference': '4',
|
||||||
|
'sig': access_token['sig'],
|
||||||
|
'token': access_token['token'],
|
||||||
|
}
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
'%s/api/channel/hls/%s.m3u8?%s'
|
||||||
|
% (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')),
|
||||||
|
channel_id, 'mp4')
|
||||||
|
|
||||||
|
view_count = stream.get('viewers')
|
||||||
|
timestamp = parse_iso8601(stream.get('created_at'))
|
||||||
|
|
||||||
|
channel = stream['channel']
|
||||||
|
title = self._live_title(channel.get('display_name') or channel.get('name'))
|
||||||
|
description = channel.get('status')
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for thumbnail_key, thumbnail_url in stream['preview'].items():
|
||||||
|
m = re.search(r'(?P<width>\d+)x(?P<height>\d+)\.jpg$', thumbnail_key)
|
||||||
|
if not m:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'url': thumbnail_url,
|
||||||
|
'width': int(m.group('width')),
|
||||||
|
'height': int(m.group('height')),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': compat_str(stream['_id']),
|
||||||
|
'display_id': channel_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'uploader': channel.get('display_name'),
|
||||||
|
'uploader_id': channel.get('name'),
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'view_count': view_count,
|
||||||
|
'formats': formats,
|
||||||
|
'is_live': True,
|
||||||
|
}
|
||||||
|
@ -1,12 +1,15 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
remove_start,
|
remove_start,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -35,8 +38,11 @@ class VideoMegaIE(InfoExtractor):
|
|||||||
req.add_header('Referer', url)
|
req.add_header('Referer', url)
|
||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
escaped_data = self._search_regex(
|
try:
|
||||||
r'unescape\("([^"]+)"\)', webpage, 'escaped data')
|
escaped_data = re.findall(r'unescape\("([^"]+)"\)', webpage)[-1]
|
||||||
|
except IndexError:
|
||||||
|
raise ExtractorError('Unable to extract escaped data')
|
||||||
|
|
||||||
playlist = compat_urllib_parse.unquote(escaped_data)
|
playlist = compat_urllib_parse.unquote(escaped_data)
|
||||||
|
|
||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
|
@ -264,7 +264,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
authentication.add_option(
|
authentication.add_option(
|
||||||
'-p', '--password',
|
'-p', '--password',
|
||||||
dest='password', metavar='PASSWORD',
|
dest='password', metavar='PASSWORD',
|
||||||
help='account password')
|
help='account password. If this option is left out, youtube-dl will ask interactively.')
|
||||||
authentication.add_option(
|
authentication.add_option(
|
||||||
'-2', '--twofactor',
|
'-2', '--twofactor',
|
||||||
dest='twofactor', metavar='TWOFACTOR',
|
dest='twofactor', metavar='TWOFACTOR',
|
||||||
@ -289,6 +289,17 @@ def parseOpts(overrideArguments=None):
|
|||||||
'extensions aac, m4a, mp3, mp4, ogg, wav, webm. '
|
'extensions aac, m4a, mp3, mp4, ogg, wav, webm. '
|
||||||
'You can also use the special names "best",'
|
'You can also use the special names "best",'
|
||||||
' "bestvideo", "bestaudio", "worst". '
|
' "bestvideo", "bestaudio", "worst". '
|
||||||
|
' You can filter the video results by putting a condition in'
|
||||||
|
' brackets, as in -f "best[height=720]"'
|
||||||
|
' (or -f "[filesize>10M]"). '
|
||||||
|
' This works for filesize, height, width, tbr, abr, and vbr'
|
||||||
|
' and the comparisons <, <=, >, >=, =, != .'
|
||||||
|
' Formats for which the value is not known are excluded unless you'
|
||||||
|
' put a question mark (?) after the operator.'
|
||||||
|
' You can combine format filters, so '
|
||||||
|
'-f "[height <=? 720][tbr>500]" '
|
||||||
|
'selects up to 720p videos (or videos where the height is not '
|
||||||
|
'known) with a bitrate of at least 500 KBit/s.'
|
||||||
' By default, youtube-dl will pick the best quality.'
|
' By default, youtube-dl will pick the best quality.'
|
||||||
' Use commas to download multiple audio formats, such as'
|
' Use commas to download multiple audio formats, such as'
|
||||||
' -f 136/137/mp4/bestvideo,140/m4a/bestaudio.'
|
' -f 136/137/mp4/bestvideo,140/m4a/bestaudio.'
|
||||||
|
@ -475,15 +475,21 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
|
|||||||
filename = information['filepath']
|
filename = information['filepath']
|
||||||
input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
|
input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
|
||||||
|
|
||||||
opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
|
opts = [
|
||||||
|
'-map', '0',
|
||||||
|
'-c', 'copy',
|
||||||
|
# Don't copy the existing subtitles, we may be running the
|
||||||
|
# postprocessor a second time
|
||||||
|
'-map', '-0:s',
|
||||||
|
'-c:s', 'mov_text',
|
||||||
|
]
|
||||||
for (i, lang) in enumerate(sub_langs):
|
for (i, lang) in enumerate(sub_langs):
|
||||||
opts.extend(['-map', '%d:0' % (i + 1), '-c:s:%d' % i, 'mov_text'])
|
opts.extend(['-map', '%d:0' % (i + 1)])
|
||||||
lang_code = self._conver_lang_code(lang)
|
lang_code = self._conver_lang_code(lang)
|
||||||
if lang_code is not None:
|
if lang_code is not None:
|
||||||
opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
|
opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
|
||||||
opts.extend(['-f', 'mp4'])
|
|
||||||
|
|
||||||
temp_filename = filename + '.temp'
|
temp_filename = prepend_extension(filename, 'temp')
|
||||||
self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename)
|
self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename)
|
||||||
self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
|
self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
|
||||||
os.remove(encodeFilename(filename))
|
os.remove(encodeFilename(filename))
|
||||||
@ -503,6 +509,10 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
|||||||
metadata['artist'] = info['uploader']
|
metadata['artist'] = info['uploader']
|
||||||
elif info.get('uploader_id') is not None:
|
elif info.get('uploader_id') is not None:
|
||||||
metadata['artist'] = info['uploader_id']
|
metadata['artist'] = info['uploader_id']
|
||||||
|
if info.get('description') is not None:
|
||||||
|
metadata['description'] = info['description']
|
||||||
|
if info.get('webpage_url') is not None:
|
||||||
|
metadata['comment'] = info['webpage_url']
|
||||||
|
|
||||||
if not metadata:
|
if not metadata:
|
||||||
self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
|
self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
|
||||||
|
@ -612,7 +612,9 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|||||||
|
|
||||||
def http_request(self, req):
|
def http_request(self, req):
|
||||||
for h, v in std_headers.items():
|
for h, v in std_headers.items():
|
||||||
if h not in req.headers:
|
# Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
|
||||||
|
# The dict keys are capitalized because of this bug by urllib
|
||||||
|
if h.capitalize() not in req.headers:
|
||||||
req.add_header(h, v)
|
req.add_header(h, v)
|
||||||
if 'Youtubedl-no-compression' in req.headers:
|
if 'Youtubedl-no-compression' in req.headers:
|
||||||
if 'Accept-encoding' in req.headers:
|
if 'Accept-encoding' in req.headers:
|
||||||
@ -1277,7 +1279,7 @@ def parse_duration(s):
|
|||||||
s = s.strip()
|
s = s.strip()
|
||||||
|
|
||||||
m = re.match(
|
m = re.match(
|
||||||
r'''(?ix)T?
|
r'''(?ix)(?:P?T)?
|
||||||
(?:
|
(?:
|
||||||
(?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
|
(?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
|
||||||
(?P<only_hours>[0-9.]+)\s*(?:hours?)|
|
(?P<only_hours>[0-9.]+)\s*(?:hours?)|
|
||||||
@ -1612,6 +1614,14 @@ def urlhandle_detect_ext(url_handle):
|
|||||||
except AttributeError: # Python < 3
|
except AttributeError: # Python < 3
|
||||||
getheader = url_handle.info().getheader
|
getheader = url_handle.info().getheader
|
||||||
|
|
||||||
|
cd = getheader('Content-Disposition')
|
||||||
|
if cd:
|
||||||
|
m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
|
||||||
|
if m:
|
||||||
|
e = determine_ext(m.group('filename'), default_ext=None)
|
||||||
|
if e:
|
||||||
|
return e
|
||||||
|
|
||||||
return getheader('Content-Type').split("/")[1]
|
return getheader('Content-Type').split("/")[1]
|
||||||
|
|
||||||
|
|
||||||
@ -1623,3 +1633,23 @@ def age_restricted(content_limit, age_limit):
|
|||||||
if content_limit is None:
|
if content_limit is None:
|
||||||
return False # Content available for everyone
|
return False # Content available for everyone
|
||||||
return age_limit < content_limit
|
return age_limit < content_limit
|
||||||
|
|
||||||
|
|
||||||
|
def is_html(first_bytes):
|
||||||
|
""" Detect whether a file contains HTML by examining its first bytes. """
|
||||||
|
|
||||||
|
BOMS = [
|
||||||
|
(b'\xef\xbb\xbf', 'utf-8'),
|
||||||
|
(b'\x00\x00\xfe\xff', 'utf-32-be'),
|
||||||
|
(b'\xff\xfe\x00\x00', 'utf-32-le'),
|
||||||
|
(b'\xff\xfe', 'utf-16-le'),
|
||||||
|
(b'\xfe\xff', 'utf-16-be'),
|
||||||
|
]
|
||||||
|
for bom, enc in BOMS:
|
||||||
|
if first_bytes.startswith(bom):
|
||||||
|
s = first_bytes[len(bom):].decode(enc, 'replace')
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
s = first_bytes.decode('utf-8', 'replace')
|
||||||
|
|
||||||
|
return re.match(r'^\s*<', s)
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2015.01.15.1'
|
__version__ = '2015.01.23.1'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user