commit
e58df5787a
@ -81,6 +81,7 @@
|
|||||||
- **BokeCC**
|
- **BokeCC**
|
||||||
- **Bpb**: Bundeszentrale für politische Bildung
|
- **Bpb**: Bundeszentrale für politische Bildung
|
||||||
- **BR**: Bayerischer Rundfunk Mediathek
|
- **BR**: Bayerischer Rundfunk Mediathek
|
||||||
|
- **BravoTV**
|
||||||
- **Break**
|
- **Break**
|
||||||
- **brightcove:legacy**
|
- **brightcove:legacy**
|
||||||
- **brightcove:new**
|
- **brightcove:new**
|
||||||
@ -499,6 +500,7 @@
|
|||||||
- **Restudy**
|
- **Restudy**
|
||||||
- **ReverbNation**
|
- **ReverbNation**
|
||||||
- **Revision3**
|
- **Revision3**
|
||||||
|
- **RICE**
|
||||||
- **RingTV**
|
- **RingTV**
|
||||||
- **RottenTomatoes**
|
- **RottenTomatoes**
|
||||||
- **Roxwel**
|
- **Roxwel**
|
||||||
@ -617,6 +619,7 @@
|
|||||||
- **ThePlatform**
|
- **ThePlatform**
|
||||||
- **ThePlatformFeed**
|
- **ThePlatformFeed**
|
||||||
- **TheSixtyOne**
|
- **TheSixtyOne**
|
||||||
|
- **TheStar**
|
||||||
- **ThisAmericanLife**
|
- **ThisAmericanLife**
|
||||||
- **ThisAV**
|
- **ThisAV**
|
||||||
- **THVideo**
|
- **THVideo**
|
||||||
@ -650,6 +653,7 @@
|
|||||||
- **tv.dfb.de**
|
- **tv.dfb.de**
|
||||||
- **TV2**
|
- **TV2**
|
||||||
- **TV2Article**
|
- **TV2Article**
|
||||||
|
- **TV3**
|
||||||
- **TV4**: tv4.se and tv4play.se
|
- **TV4**: tv4.se and tv4play.se
|
||||||
- **TVC**
|
- **TVC**
|
||||||
- **TVCArticle**
|
- **TVCArticle**
|
||||||
|
@ -222,6 +222,11 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
downloaded = ydl.downloaded_info_dicts[0]
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
self.assertEqual(downloaded['format_id'], 'dash-video-low')
|
self.assertEqual(downloaded['format_id'], 'dash-video-low')
|
||||||
|
|
||||||
|
ydl = YDL({'format': 'bestvideo[format_id^=dash][format_id$=low]'})
|
||||||
|
ydl.process_ie_result(info_dict.copy())
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'dash-video-low')
|
||||||
|
|
||||||
formats = [
|
formats = [
|
||||||
{'format_id': 'vid-vcodec-dot', 'ext': 'mp4', 'preference': 1, 'vcodec': 'avc1.123456', 'acodec': 'none', 'url': TEST_URL},
|
{'format_id': 'vid-vcodec-dot', 'ext': 'mp4', 'preference': 1, 'vcodec': 'avc1.123456', 'acodec': 'none', 'url': TEST_URL},
|
||||||
]
|
]
|
||||||
|
@ -28,6 +28,7 @@ from youtube_dl.utils import (
|
|||||||
encodeFilename,
|
encodeFilename,
|
||||||
escape_rfc3986,
|
escape_rfc3986,
|
||||||
escape_url,
|
escape_url,
|
||||||
|
extract_attributes,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
@ -77,6 +78,7 @@ from youtube_dl.utils import (
|
|||||||
cli_bool_option,
|
cli_bool_option,
|
||||||
)
|
)
|
||||||
from youtube_dl.compat import (
|
from youtube_dl.compat import (
|
||||||
|
compat_chr,
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
@ -629,6 +631,44 @@ class TestUtil(unittest.TestCase):
|
|||||||
on = js_to_json('{"abc": "def",}')
|
on = js_to_json('{"abc": "def",}')
|
||||||
self.assertEqual(json.loads(on), {'abc': 'def'})
|
self.assertEqual(json.loads(on), {'abc': 'def'})
|
||||||
|
|
||||||
|
def test_extract_attributes(self):
|
||||||
|
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||||
|
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
||||||
|
self.assertEqual(extract_attributes('<e x=y>'), {'x': 'y'})
|
||||||
|
self.assertEqual(extract_attributes('<e x="a \'b\' c">'), {'x': "a 'b' c"})
|
||||||
|
self.assertEqual(extract_attributes('<e x=\'a "b" c\'>'), {'x': 'a "b" c'})
|
||||||
|
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||||
|
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||||
|
self.assertEqual(extract_attributes('<e x="&">'), {'x': '&'}) # XML
|
||||||
|
self.assertEqual(extract_attributes('<e x=""">'), {'x': '"'})
|
||||||
|
self.assertEqual(extract_attributes('<e x="£">'), {'x': '£'}) # HTML 3.2
|
||||||
|
self.assertEqual(extract_attributes('<e x="λ">'), {'x': 'λ'}) # HTML 4.0
|
||||||
|
self.assertEqual(extract_attributes('<e x="&foo">'), {'x': '&foo'})
|
||||||
|
self.assertEqual(extract_attributes('<e x="\'">'), {'x': "'"})
|
||||||
|
self.assertEqual(extract_attributes('<e x=\'"\'>'), {'x': '"'})
|
||||||
|
self.assertEqual(extract_attributes('<e x >'), {'x': None})
|
||||||
|
self.assertEqual(extract_attributes('<e x=y a>'), {'x': 'y', 'a': None})
|
||||||
|
self.assertEqual(extract_attributes('<e x= y>'), {'x': 'y'})
|
||||||
|
self.assertEqual(extract_attributes('<e x=1 y=2 x=3>'), {'y': '2', 'x': '3'})
|
||||||
|
self.assertEqual(extract_attributes('<e \nx=\ny\n>'), {'x': 'y'})
|
||||||
|
self.assertEqual(extract_attributes('<e \nx=\n"y"\n>'), {'x': 'y'})
|
||||||
|
self.assertEqual(extract_attributes("<e \nx=\n'y'\n>"), {'x': 'y'})
|
||||||
|
self.assertEqual(extract_attributes('<e \nx="\ny\n">'), {'x': '\ny\n'})
|
||||||
|
self.assertEqual(extract_attributes('<e CAPS=x>'), {'caps': 'x'}) # Names lowercased
|
||||||
|
self.assertEqual(extract_attributes('<e x=1 X=2>'), {'x': '2'})
|
||||||
|
self.assertEqual(extract_attributes('<e X=1 x=2>'), {'x': '2'})
|
||||||
|
self.assertEqual(extract_attributes('<e _:funny-name1=1>'), {'_:funny-name1': '1'})
|
||||||
|
self.assertEqual(extract_attributes('<e x="Fáilte 世界 \U0001f600">'), {'x': 'Fáilte 世界 \U0001f600'})
|
||||||
|
self.assertEqual(extract_attributes('<e x="décomposé">'), {'x': 'décompose\u0301'})
|
||||||
|
# "Narrow" Python builds don't support unicode code points outside BMP.
|
||||||
|
try:
|
||||||
|
compat_chr(0x10000)
|
||||||
|
supports_outside_bmp = True
|
||||||
|
except ValueError:
|
||||||
|
supports_outside_bmp = False
|
||||||
|
if supports_outside_bmp:
|
||||||
|
self.assertEqual(extract_attributes('<e x="Smile 😀!">'), {'x': 'Smile \U0001f600!'})
|
||||||
|
|
||||||
def test_clean_html(self):
|
def test_clean_html(self):
|
||||||
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
||||||
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
||||||
@ -662,6 +702,8 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(parse_count('1.000'), 1000)
|
self.assertEqual(parse_count('1.000'), 1000)
|
||||||
self.assertEqual(parse_count('1.1k'), 1100)
|
self.assertEqual(parse_count('1.1k'), 1100)
|
||||||
self.assertEqual(parse_count('1.1kk'), 1100000)
|
self.assertEqual(parse_count('1.1kk'), 1100000)
|
||||||
|
self.assertEqual(parse_count('1.1kk '), 1100000)
|
||||||
|
self.assertEqual(parse_count('1.1kk views'), 1100000)
|
||||||
|
|
||||||
def test_version_tuple(self):
|
def test_version_tuple(self):
|
||||||
self.assertEqual(version_tuple('1'), (1,))
|
self.assertEqual(version_tuple('1'), (1,))
|
||||||
|
@ -905,7 +905,7 @@ class YoutubeDL(object):
|
|||||||
'*=': lambda attr, value: value in attr,
|
'*=': lambda attr, value: value in attr,
|
||||||
}
|
}
|
||||||
str_operator_rex = re.compile(r'''(?x)
|
str_operator_rex = re.compile(r'''(?x)
|
||||||
\s*(?P<key>ext|acodec|vcodec|container|protocol)
|
\s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
|
||||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
||||||
\s*(?P<value>[a-zA-Z0-9._-]+)
|
\s*(?P<value>[a-zA-Z0-9._-]+)
|
||||||
\s*$
|
\s*$
|
||||||
|
@ -77,6 +77,11 @@ try:
|
|||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
from urllib import urlretrieve as compat_urlretrieve
|
from urllib import urlretrieve as compat_urlretrieve
|
||||||
|
|
||||||
|
try:
|
||||||
|
from html.parser import HTMLParser as compat_HTMLParser
|
||||||
|
except ImportError: # Python 2
|
||||||
|
from HTMLParser import HTMLParser as compat_HTMLParser
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from subprocess import DEVNULL
|
from subprocess import DEVNULL
|
||||||
@ -251,6 +256,16 @@ else:
|
|||||||
el.text = el.text.decode('utf-8')
|
el.text = el.text.decode('utf-8')
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
|
if sys.version_info < (2, 7):
|
||||||
|
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
|
||||||
|
# .//node does not match if a node is a direct child of . !
|
||||||
|
def compat_xpath(xpath):
|
||||||
|
if isinstance(xpath, compat_str):
|
||||||
|
xpath = xpath.encode('ascii')
|
||||||
|
return xpath
|
||||||
|
else:
|
||||||
|
compat_xpath = lambda xpath: xpath
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from urllib.parse import parse_qs as compat_parse_qs
|
from urllib.parse import parse_qs as compat_parse_qs
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
@ -543,6 +558,7 @@ else:
|
|||||||
from tokenize import generate_tokens as compat_tokenize_tokenize
|
from tokenize import generate_tokens as compat_tokenize_tokenize
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
|
'compat_HTMLParser',
|
||||||
'compat_HTTPError',
|
'compat_HTTPError',
|
||||||
'compat_basestring',
|
'compat_basestring',
|
||||||
'compat_chr',
|
'compat_chr',
|
||||||
@ -579,6 +595,7 @@ __all__ = [
|
|||||||
'compat_urlparse',
|
'compat_urlparse',
|
||||||
'compat_urlretrieve',
|
'compat_urlretrieve',
|
||||||
'compat_xml_parse_error',
|
'compat_xml_parse_error',
|
||||||
|
'compat_xpath',
|
||||||
'shlex_quote',
|
'shlex_quote',
|
||||||
'subprocess_check_output',
|
'subprocess_check_output',
|
||||||
'workaround_optparse_bug9161',
|
'workaround_optparse_bug9161',
|
||||||
|
@ -81,6 +81,7 @@ from .bloomberg import BloombergIE
|
|||||||
from .bokecc import BokeCCIE
|
from .bokecc import BokeCCIE
|
||||||
from .bpb import BpbIE
|
from .bpb import BpbIE
|
||||||
from .br import BRIE
|
from .br import BRIE
|
||||||
|
from .bravotv import BravoTVIE
|
||||||
from .breakcom import BreakIE
|
from .breakcom import BreakIE
|
||||||
from .brightcove import (
|
from .brightcove import (
|
||||||
BrightcoveLegacyIE,
|
BrightcoveLegacyIE,
|
||||||
@ -135,6 +136,7 @@ from .collegerama import CollegeRamaIE
|
|||||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||||
from .comcarcoff import ComCarCoffIE
|
from .comcarcoff import ComCarCoffIE
|
||||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||||
|
from .commonprotocols import RtmpIE
|
||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
from .cracked import CrackedIE
|
from .cracked import CrackedIE
|
||||||
from .crackle import CrackleIE
|
from .crackle import CrackleIE
|
||||||
@ -282,6 +284,7 @@ from .goshgay import GoshgayIE
|
|||||||
from .gputechconf import GPUTechConfIE
|
from .gputechconf import GPUTechConfIE
|
||||||
from .groupon import GrouponIE
|
from .groupon import GrouponIE
|
||||||
from .hark import HarkIE
|
from .hark import HarkIE
|
||||||
|
from .hbo import HBOIE
|
||||||
from .hearthisat import HearThisAtIE
|
from .hearthisat import HearThisAtIE
|
||||||
from .heise import HeiseIE
|
from .heise import HeiseIE
|
||||||
from .hellporno import HellPornoIE
|
from .hellporno import HellPornoIE
|
||||||
@ -784,6 +787,7 @@ from .tv2 import (
|
|||||||
TV2IE,
|
TV2IE,
|
||||||
TV2ArticleIE,
|
TV2ArticleIE,
|
||||||
)
|
)
|
||||||
|
from .tv3 import TV3IE
|
||||||
from .tv4 import TV4IE
|
from .tv4 import TV4IE
|
||||||
from .tvc import (
|
from .tvc import (
|
||||||
TVCIE,
|
TVCIE,
|
||||||
|
@ -18,7 +18,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
_LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
|
_LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
|
||||||
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
|
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
|
||||||
_NETRC_MACHINE = 'animeondemand'
|
_NETRC_MACHINE = 'animeondemand'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://www.anime-on-demand.de/anime/161',
|
'url': 'https://www.anime-on-demand.de/anime/161',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '161',
|
'id': '161',
|
||||||
@ -26,7 +26,15 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
'description': 'md5:6681ce3c07c7189d255ac6ab23812d31',
|
'description': 'md5:6681ce3c07c7189d255ac6ab23812d31',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 4,
|
'playlist_mincount': 4,
|
||||||
}
|
}, {
|
||||||
|
# Film wording is used instead of Episode
|
||||||
|
'url': 'https://www.anime-on-demand.de/anime/39',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Episodes without titles
|
||||||
|
'url': 'https://www.anime-on-demand.de/anime/162',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
(username, password) = self._get_login_info()
|
||||||
@ -91,14 +99,22 @@ class AnimeOnDemandIE(InfoExtractor):
|
|||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
for episode_html in re.findall(r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage):
|
for num, episode_html in enumerate(re.findall(
|
||||||
m = re.search(
|
r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage), 1):
|
||||||
r'class="episodebox-title"[^>]+title="Episode (?P<number>\d+) - (?P<title>.+?)"', episode_html)
|
episodebox_title = self._search_regex(
|
||||||
if not m:
|
(r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||||
|
r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
|
||||||
|
episode_html, 'episodebox title', default=None, group='title')
|
||||||
|
if not episodebox_title:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
episode_number = int(m.group('number'))
|
episode_number = int(self._search_regex(
|
||||||
episode_title = m.group('title')
|
r'(?:Episode|Film)\s*(\d+)',
|
||||||
|
episodebox_title, 'episode number', default=num))
|
||||||
|
episode_title = self._search_regex(
|
||||||
|
r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
|
||||||
|
episodebox_title, 'episode title', default=None)
|
||||||
|
|
||||||
video_id = 'episode-%d' % episode_number
|
video_id = 'episode-%d' % episode_number
|
||||||
|
|
||||||
common_info = {
|
common_info = {
|
||||||
|
28
youtube_dl/extractor/bravotv.py
Normal file
28
youtube_dl/extractor/bravotv.py
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import smuggle_url
|
||||||
|
|
||||||
|
|
||||||
|
class BravoTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+videos/(?P<id>[^/?]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.bravotv.com/last-chance-kitchen/season-5/videos/lck-ep-12-fishy-finale',
|
||||||
|
'md5': 'd60cdf68904e854fac669bd26cccf801',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'LitrBdX64qLn',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Last Chance Kitchen Returns',
|
||||||
|
'description': 'S13: Last Chance Kitchen Returns for Top Chef Season 13',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
account_pid = self._search_regex(r'"account_pid"\s*:\s*"([^"]+)"', webpage, 'account pid')
|
||||||
|
release_pid = self._search_regex(r'"release_pid"\s*:\s*"([^"]+)"', webpage, 'release pid')
|
||||||
|
return self.url_result(smuggle_url(
|
||||||
|
'http://link.theplatform.com/s/%s/%s?mbr=true&switch=progressive' % (account_pid, release_pid),
|
||||||
|
{'force_smil_url': True}), 'ThePlatform', release_pid)
|
@ -9,7 +9,6 @@ from ..compat import (
|
|||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_parse,
|
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
compat_xml_parse_error,
|
compat_xml_parse_error,
|
||||||
@ -24,16 +23,16 @@ from ..utils import (
|
|||||||
js_to_json,
|
js_to_json,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
sanitized_Request,
|
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BrightcoveLegacyIE(InfoExtractor):
|
class BrightcoveLegacyIE(InfoExtractor):
|
||||||
IE_NAME = 'brightcove:legacy'
|
IE_NAME = 'brightcove:legacy'
|
||||||
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
|
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
|
||||||
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
|
_FEDERATED_URL = 'http://c.brightcove.com/services/viewer/htmlFederated'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@ -156,7 +155,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
# Not all pages define this value
|
# Not all pages define this value
|
||||||
if playerKey is not None:
|
if playerKey is not None:
|
||||||
params['playerKey'] = playerKey
|
params['playerKey'] = playerKey
|
||||||
# The three fields hold the id of the video
|
# These fields hold the id of the video
|
||||||
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
|
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
|
||||||
if videoPlayer is not None:
|
if videoPlayer is not None:
|
||||||
params['@videoPlayer'] = videoPlayer
|
params['@videoPlayer'] = videoPlayer
|
||||||
@ -185,8 +184,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _make_brightcove_url(cls, params):
|
def _make_brightcove_url(cls, params):
|
||||||
data = compat_urllib_parse.urlencode(params)
|
return update_url_query(cls._FEDERATED_URL, params)
|
||||||
return cls._FEDERATED_URL_TEMPLATE % data
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _extract_brightcove_url(cls, webpage):
|
def _extract_brightcove_url(cls, webpage):
|
||||||
@ -240,7 +238,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
# We set the original url as the default 'Referer' header
|
# We set the original url as the default 'Referer' header
|
||||||
referer = smuggled_data.get('Referer', url)
|
referer = smuggled_data.get('Referer', url)
|
||||||
return self._get_video_info(
|
return self._get_video_info(
|
||||||
videoPlayer[0], query_str, query, referer=referer)
|
videoPlayer[0], query, referer=referer)
|
||||||
elif 'playerKey' in query:
|
elif 'playerKey' in query:
|
||||||
player_key = query['playerKey']
|
player_key = query['playerKey']
|
||||||
return self._get_playlist_info(player_key[0])
|
return self._get_playlist_info(player_key[0])
|
||||||
@ -249,15 +247,14 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
|
'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
def _get_video_info(self, video_id, query_str, query, referer=None):
|
def _get_video_info(self, video_id, query, referer=None):
|
||||||
request_url = self._FEDERATED_URL_TEMPLATE % query_str
|
headers = {}
|
||||||
req = sanitized_Request(request_url)
|
|
||||||
linkBase = query.get('linkBaseURL')
|
linkBase = query.get('linkBaseURL')
|
||||||
if linkBase is not None:
|
if linkBase is not None:
|
||||||
referer = linkBase[0]
|
referer = linkBase[0]
|
||||||
if referer is not None:
|
if referer is not None:
|
||||||
req.add_header('Referer', referer)
|
headers['Referer'] = referer
|
||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(self._FEDERATED_URL, video_id, headers=headers, query=query)
|
||||||
|
|
||||||
error_msg = self._html_search_regex(
|
error_msg = self._html_search_regex(
|
||||||
r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
|
r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
|
||||||
@ -415,8 +412,8 @@ class BrightcoveNewIE(InfoExtractor):
|
|||||||
|
|
||||||
# Look for iframe embeds [1]
|
# Look for iframe embeds [1]
|
||||||
for _, url in re.findall(
|
for _, url in re.findall(
|
||||||
r'<iframe[^>]+src=(["\'])((?:https?:)//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
|
r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
|
||||||
entries.append(url)
|
entries.append(url if url.startswith('http') else 'http:' + url)
|
||||||
|
|
||||||
# Look for embed_in_page embeds [2]
|
# Look for embed_in_page embeds [2]
|
||||||
for video_id, account_id, player_id, embed in re.findall(
|
for video_id, account_id, player_id, embed in re.findall(
|
||||||
@ -459,12 +456,11 @@ class BrightcoveNewIE(InfoExtractor):
|
|||||||
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
||||||
webpage, 'policy key', group='pk')
|
webpage, 'policy key', group='pk')
|
||||||
|
|
||||||
req = sanitized_Request(
|
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
|
||||||
'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s'
|
|
||||||
% (account_id, video_id),
|
|
||||||
headers={'Accept': 'application/json;pk=%s' % policy_key})
|
|
||||||
try:
|
try:
|
||||||
json_data = self._download_json(req, video_id)
|
json_data = self._download_json(api_url, video_id, headers={
|
||||||
|
'Accept': 'application/json;pk=%s' % policy_key
|
||||||
|
})
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
json_data = self._parse_json(e.cause.read().decode(), video_id)
|
json_data = self._parse_json(e.cause.read().decode(), video_id)
|
||||||
@ -482,8 +478,7 @@ class BrightcoveNewIE(InfoExtractor):
|
|||||||
if not src:
|
if not src:
|
||||||
continue
|
continue
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
src, video_id, 'mp4', entry_protocol='m3u8_native',
|
src, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||||
m3u8_id='hls', fatal=False))
|
|
||||||
elif source_type == 'application/dash+xml':
|
elif source_type == 'application/dash+xml':
|
||||||
if not src:
|
if not src:
|
||||||
continue
|
continue
|
||||||
|
@ -78,7 +78,7 @@ class CBSNewsIE(ThePlatformIE):
|
|||||||
pid = item.get('media' + format_id)
|
pid = item.get('media' + format_id)
|
||||||
if not pid:
|
if not pid:
|
||||||
continue
|
continue
|
||||||
release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?format=SMIL&mbr=true' % pid
|
release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' % pid
|
||||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid)
|
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid)
|
||||||
formats.extend(tp_formats)
|
formats.extend(tp_formats)
|
||||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||||
|
@ -60,7 +60,7 @@ class CNETIE(ThePlatformIE):
|
|||||||
for (fkey, vid) in vdata['files'].items():
|
for (fkey, vid) in vdata['files'].items():
|
||||||
if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
|
if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
|
||||||
continue
|
continue
|
||||||
release_url = 'http://link.theplatform.com/s/kYEXFC/%s?format=SMIL&mbr=true' % vid
|
release_url = 'http://link.theplatform.com/s/kYEXFC/%s?mbr=true' % vid
|
||||||
if fkey == 'hds':
|
if fkey == 'hds':
|
||||||
release_url += '&manifest=f4m'
|
release_url += '&manifest=f4m'
|
||||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)
|
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)
|
||||||
|
36
youtube_dl/extractor/commonprotocols.py
Normal file
36
youtube_dl/extractor/commonprotocols.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
|
from ..utils import url_basename
|
||||||
|
|
||||||
|
|
||||||
|
class RtmpIE(InfoExtractor):
|
||||||
|
IE_DESC = False # Do not list
|
||||||
|
_VALID_URL = r'(?i)rtmp[est]?://.+'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'rtmp://cp44293.edgefcs.net/ondemand?auth=daEcTdydfdqcsb8cZcDbAaCbhamacbbawaS-bw7dBb-bWG-GqpGFqCpNCnGoyL&aifp=v001&slist=public/unsecure/audio/2c97899446428e4301471a8cb72b4b97--audio--pmg-20110908-0900a_flv_aac_med_int.mp4',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'rtmp://edge.live.hitbox.tv/live/dimak',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
|
||||||
|
title = compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': [{
|
||||||
|
'url': url,
|
||||||
|
'ext': 'flv',
|
||||||
|
'format_id': compat_urlparse.urlparse(url).scheme,
|
||||||
|
}],
|
||||||
|
}
|
@ -54,7 +54,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
|
def _download_webpage(self, url_or_request, *args, **kwargs):
|
||||||
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
|
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
|
||||||
else sanitized_Request(url_or_request))
|
else sanitized_Request(url_or_request))
|
||||||
# Accept-Language must be set explicitly to accept any language to avoid issues
|
# Accept-Language must be set explicitly to accept any language to avoid issues
|
||||||
@ -65,8 +65,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||||||
# Crunchyroll to not work in georestriction cases in some browsers that don't place
|
# Crunchyroll to not work in georestriction cases in some browsers that don't place
|
||||||
# the locale lang first in header. However allowing any language seems to workaround the issue.
|
# the locale lang first in header. However allowing any language seems to workaround the issue.
|
||||||
request.add_header('Accept-Language', '*')
|
request.add_header('Accept-Language', '*')
|
||||||
return super(CrunchyrollBaseIE, self)._download_webpage(
|
return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
|
||||||
request, video_id, note, errnote, fatal, tries, timeout, encoding)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _add_skip_wall(url):
|
def _add_skip_wall(url):
|
||||||
|
@ -239,6 +239,35 @@ class GenericIE(InfoExtractor):
|
|||||||
'format': 'bestvideo',
|
'format': 'bestvideo',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
|
||||||
|
{
|
||||||
|
'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'content',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'content',
|
||||||
|
'formats': 'mincount:8',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 downloads
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
# m3u8 served with Content-Type: text/plain
|
||||||
|
{
|
||||||
|
'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'index',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'index',
|
||||||
|
'upload_date': '20140720',
|
||||||
|
'formats': 'mincount:11',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 downloads
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
},
|
||||||
# google redirect
|
# google redirect
|
||||||
{
|
{
|
||||||
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
||||||
@ -1245,14 +1274,13 @@ class GenericIE(InfoExtractor):
|
|||||||
info_dict = {
|
info_dict = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
||||||
|
'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
|
||||||
}
|
}
|
||||||
|
|
||||||
# Check for direct link to a video
|
# Check for direct link to a video
|
||||||
content_type = head_response.headers.get('Content-Type', '')
|
content_type = head_response.headers.get('Content-Type', '').lower()
|
||||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>.+)$', content_type)
|
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||||
if m:
|
if m:
|
||||||
upload_date = unified_strdate(
|
|
||||||
head_response.headers.get('Last-Modified'))
|
|
||||||
format_id = m.group('format_id')
|
format_id = m.group('format_id')
|
||||||
if format_id.endswith('mpegurl'):
|
if format_id.endswith('mpegurl'):
|
||||||
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
|
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
|
||||||
@ -1264,11 +1292,8 @@ class GenericIE(InfoExtractor):
|
|||||||
'url': url,
|
'url': url,
|
||||||
'vcodec': 'none' if m.group('type') == 'audio' else None
|
'vcodec': 'none' if m.group('type') == 'audio' else None
|
||||||
}]
|
}]
|
||||||
info_dict.update({
|
info_dict['direct'] = True
|
||||||
'direct': True,
|
info_dict['formats'] = formats
|
||||||
'formats': formats,
|
|
||||||
'upload_date': upload_date,
|
|
||||||
})
|
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
if not self._downloader.params.get('test', False) and not is_intentional:
|
if not self._downloader.params.get('test', False) and not is_intentional:
|
||||||
@ -1289,18 +1314,21 @@ class GenericIE(InfoExtractor):
|
|||||||
request.add_header('Accept-Encoding', '*')
|
request.add_header('Accept-Encoding', '*')
|
||||||
full_response = self._request_webpage(request, video_id)
|
full_response = self._request_webpage(request, video_id)
|
||||||
|
|
||||||
|
first_bytes = full_response.read(512)
|
||||||
|
|
||||||
|
# Is it an M3U playlist?
|
||||||
|
if first_bytes.startswith(b'#EXTM3U'):
|
||||||
|
info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
|
||||||
|
return info_dict
|
||||||
|
|
||||||
# Maybe it's a direct link to a video?
|
# Maybe it's a direct link to a video?
|
||||||
# Be careful not to download the whole thing!
|
# Be careful not to download the whole thing!
|
||||||
first_bytes = full_response.read(512)
|
|
||||||
if not is_html(first_bytes):
|
if not is_html(first_bytes):
|
||||||
self._downloader.report_warning(
|
self._downloader.report_warning(
|
||||||
'URL could be a direct video link, returning it as such.')
|
'URL could be a direct video link, returning it as such.')
|
||||||
upload_date = unified_strdate(
|
|
||||||
head_response.headers.get('Last-Modified'))
|
|
||||||
info_dict.update({
|
info_dict.update({
|
||||||
'direct': True,
|
'direct': True,
|
||||||
'url': url,
|
'url': url,
|
||||||
'upload_date': upload_date,
|
|
||||||
})
|
})
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
|
122
youtube_dl/extractor/hbo.py
Normal file
122
youtube_dl/extractor/hbo.py
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
xpath_text,
|
||||||
|
xpath_element,
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class HBOIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
|
||||||
|
'md5': '1c33253f0c7782142c993c0ba62a8753',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1437839',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ep. 64 Clip: Encryption',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_FORMATS_INFO = {
|
||||||
|
'1920': {
|
||||||
|
'width': 1280,
|
||||||
|
'height': 720,
|
||||||
|
},
|
||||||
|
'640': {
|
||||||
|
'width': 768,
|
||||||
|
'height': 432,
|
||||||
|
},
|
||||||
|
'highwifi': {
|
||||||
|
'width': 640,
|
||||||
|
'height': 360,
|
||||||
|
},
|
||||||
|
'high3g': {
|
||||||
|
'width': 640,
|
||||||
|
'height': 360,
|
||||||
|
},
|
||||||
|
'medwifi': {
|
||||||
|
'width': 400,
|
||||||
|
'height': 224,
|
||||||
|
},
|
||||||
|
'med3g': {
|
||||||
|
'width': 400,
|
||||||
|
'height': 224,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
video_data = self._download_xml(
|
||||||
|
'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id)
|
||||||
|
title = xpath_text(video_data, 'title', 'title', True)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for source in xpath_element(video_data, 'videos', 'sources', True):
|
||||||
|
if source.tag == 'size':
|
||||||
|
path = xpath_text(source, './/path')
|
||||||
|
if not path:
|
||||||
|
continue
|
||||||
|
width = source.attrib.get('width')
|
||||||
|
format_info = self._FORMATS_INFO.get(width, {})
|
||||||
|
height = format_info.get('height')
|
||||||
|
fmt = {
|
||||||
|
'url': path,
|
||||||
|
'format_id': 'http%s' % ('-%dp' % height if height else ''),
|
||||||
|
'width': format_info.get('width'),
|
||||||
|
'height': height,
|
||||||
|
}
|
||||||
|
rtmp = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', path)
|
||||||
|
if rtmp:
|
||||||
|
fmt.update({
|
||||||
|
'url': rtmp.group('url'),
|
||||||
|
'play_path': rtmp.group('playpath'),
|
||||||
|
'app': rtmp.group('app'),
|
||||||
|
'ext': 'flv',
|
||||||
|
'format_id': fmt['format_id'].replace('http', 'rtmp'),
|
||||||
|
})
|
||||||
|
formats.append(fmt)
|
||||||
|
else:
|
||||||
|
video_url = source.text
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
if source.tag == 'tarball':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
video_url.replace('.tar', '/base_index_w8.m3u8'),
|
||||||
|
video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||||
|
else:
|
||||||
|
format_info = self._FORMATS_INFO.get(source.tag, {})
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'http-%s' % source.tag,
|
||||||
|
'url': video_url,
|
||||||
|
'width': format_info.get('width'),
|
||||||
|
'height': format_info.get('height'),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
card_sizes = xpath_element(video_data, 'titleCardSizes')
|
||||||
|
if card_sizes is not None:
|
||||||
|
for size in card_sizes:
|
||||||
|
path = xpath_text(size, 'path')
|
||||||
|
if not path:
|
||||||
|
continue
|
||||||
|
width = int_or_none(size.get('width'))
|
||||||
|
thumbnails.append({
|
||||||
|
'id': width,
|
||||||
|
'url': path,
|
||||||
|
'width': width,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'duration': parse_duration(xpath_element(video_data, 'duration/tv14')),
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
}
|
@ -48,7 +48,7 @@ class NationalGeographicIE(InfoExtractor):
|
|||||||
theplatform_id = url_basename(content.attrib.get('url'))
|
theplatform_id = url_basename(content.attrib.get('url'))
|
||||||
|
|
||||||
return self.url_result(smuggle_url(
|
return self.url_result(smuggle_url(
|
||||||
'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id,
|
'http://link.theplatform.com/s/ngs/%s?formats=MPEG4&manifest=f4m' % theplatform_id,
|
||||||
# For some reason, the normal links don't work and we must force
|
# For some reason, the normal links don't work and we must force
|
||||||
# the use of f4m
|
# the use of f4m
|
||||||
{'force_smil_url': True}))
|
{'force_smil_url': True}))
|
||||||
|
@ -3,13 +3,16 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_HTTPError
|
from .theplatform import ThePlatformIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
update_url_query,
|
||||||
|
int_or_none,
|
||||||
|
HEADRequest,
|
||||||
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -131,10 +134,10 @@ class NBCSportsIE(InfoExtractor):
|
|||||||
NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
|
NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
|
||||||
|
|
||||||
|
|
||||||
class NBCNewsIE(InfoExtractor):
|
class NBCNewsIE(ThePlatformIE):
|
||||||
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
|
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
|
||||||
(?:video/.+?/(?P<id>\d+)|
|
(?:video/.+?/(?P<id>\d+)|
|
||||||
(?:watch|feature|nightly-news)/[^/]+/(?P<title>.+))
|
([^/]+/)*(?P<display_id>[^/?]+))
|
||||||
'''
|
'''
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
@ -149,15 +152,14 @@ class NBCNewsIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.nbcnews.com/feature/edward-snowden-interview/how-twitter-reacted-snowden-interview-n117236',
|
'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
|
||||||
'md5': 'b2421750c9f260783721d898f4c42063',
|
'md5': 'af1adfa51312291a017720403826bb64',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'I1wpAI_zmhsQ',
|
'id': '269389891880',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'How Twitter Reacted To The Snowden Interview',
|
'title': 'How Twitter Reacted To The Snowden Interview',
|
||||||
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
|
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
|
||||||
},
|
},
|
||||||
'add_ie': ['ThePlatform'],
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
|
'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
|
||||||
@ -168,17 +170,29 @@ class NBCNewsIE(InfoExtractor):
|
|||||||
'title': 'FULL EPISODE: Family Business',
|
'title': 'FULL EPISODE: Family Business',
|
||||||
'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
|
'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
|
||||||
},
|
},
|
||||||
|
'skip': 'This page is unavailable.',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
|
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
|
||||||
'md5': 'b5dda8cddd8650baa0dcb616dd2cf60d',
|
'md5': '73135a2e0ef819107bbb55a5a9b2a802',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'sekXqyTVnmN3',
|
'id': '394064451844',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
|
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
|
||||||
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
|
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
|
||||||
|
'md5': 'a49e173825e5fcd15c13fc297fced39d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '529953347624',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Volkswagen U.S. Chief: We \'Totally Screwed Up\'',
|
||||||
|
'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
|
||||||
|
},
|
||||||
|
'expected_warnings': ['http-6000 is not available']
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
|
'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -202,49 +216,80 @@ class NBCNewsIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
# "feature" and "nightly-news" pages use theplatform.com
|
# "feature" and "nightly-news" pages use theplatform.com
|
||||||
title = mobj.group('title')
|
display_id = mobj.group('display_id')
|
||||||
webpage = self._download_webpage(url, title)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
info = None
|
||||||
bootstrap_json = self._search_regex(
|
bootstrap_json = self._search_regex(
|
||||||
r'var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$',
|
r'(?m)var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$',
|
||||||
webpage, 'bootstrap json', flags=re.MULTILINE)
|
webpage, 'bootstrap json', default=None)
|
||||||
bootstrap = self._parse_json(bootstrap_json, video_id)
|
if bootstrap_json:
|
||||||
|
bootstrap = self._parse_json(bootstrap_json, display_id)
|
||||||
info = bootstrap['results'][0]['video']
|
info = bootstrap['results'][0]['video']
|
||||||
mpxid = info['mpxId']
|
else:
|
||||||
|
player_instance_json = self._search_regex(
|
||||||
|
r'videoObj\s*:\s*({.+})', webpage, 'player instance')
|
||||||
|
info = self._parse_json(player_instance_json, display_id)
|
||||||
|
video_id = info['mpxId']
|
||||||
|
title = info['title']
|
||||||
|
|
||||||
base_urls = [
|
subtitles = {}
|
||||||
info['fallbackPlaylistUrl'],
|
caption_links = info.get('captionLinks')
|
||||||
info['associatedPlaylistUrl'],
|
if caption_links:
|
||||||
]
|
for (sub_key, sub_ext) in (('smpte-tt', 'ttml'), ('web-vtt', 'vtt'), ('srt', 'srt')):
|
||||||
|
sub_url = caption_links.get(sub_key)
|
||||||
|
if sub_url:
|
||||||
|
subtitles.setdefault('en', []).append({
|
||||||
|
'url': sub_url,
|
||||||
|
'ext': sub_ext,
|
||||||
|
})
|
||||||
|
|
||||||
for base_url in base_urls:
|
formats = []
|
||||||
if not base_url:
|
for video_asset in info['videoAssets']:
|
||||||
|
video_url = video_asset.get('publicUrl')
|
||||||
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
playlist_url = base_url + '?form=MPXNBCNewsAPI'
|
container = video_asset.get('format')
|
||||||
|
asset_type = video_asset.get('assetType') or ''
|
||||||
try:
|
if container == 'ISM' or asset_type == 'FireTV-Once':
|
||||||
all_videos = self._download_json(playlist_url, title)
|
|
||||||
except ExtractorError as ee:
|
|
||||||
if isinstance(ee.cause, compat_HTTPError):
|
|
||||||
continue
|
continue
|
||||||
raise
|
elif asset_type == 'OnceURL':
|
||||||
|
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||||
if not all_videos or 'videos' not in all_videos:
|
video_url, video_id)
|
||||||
continue
|
formats.extend(tp_formats)
|
||||||
|
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||||
try:
|
else:
|
||||||
info = next(v for v in all_videos['videos'] if v['mpxId'] == mpxid)
|
tbr = int_or_none(video_asset.get('bitRate'), 1000)
|
||||||
break
|
format_id = 'http%s' % ('-%d' % tbr if tbr else '')
|
||||||
except StopIteration:
|
video_url = update_url_query(
|
||||||
continue
|
video_url, {'format': 'redirect'})
|
||||||
|
# resolve the url so that we can check availability and detect the correct extension
|
||||||
if info is None:
|
head = self._request_webpage(
|
||||||
raise ExtractorError('Could not find video in playlists')
|
HEADRequest(video_url), video_id,
|
||||||
|
'Checking %s url' % format_id,
|
||||||
|
'%s is not available' % format_id,
|
||||||
|
fatal=False)
|
||||||
|
if head:
|
||||||
|
video_url = head.geturl()
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': video_url,
|
||||||
|
'width': int_or_none(video_asset.get('width')),
|
||||||
|
'height': int_or_none(video_asset.get('height')),
|
||||||
|
'tbr': tbr,
|
||||||
|
'container': video_asset.get('format'),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url',
|
'id': video_id,
|
||||||
# We get the best quality video
|
'title': title,
|
||||||
'url': info['videoAssets'][-1]['publicUrl'],
|
'description': info.get('description'),
|
||||||
'ie_key': 'ThePlatform',
|
'thumbnail': info.get('description'),
|
||||||
|
'thumbnail': info.get('thumbnail'),
|
||||||
|
'duration': int_or_none(info.get('duration')),
|
||||||
|
'timestamp': parse_iso8601(info.get('pubDate')),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -2,7 +2,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urllib_parse_unquote
|
from ..compat import (
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
|
compat_xpath,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
@ -47,7 +50,7 @@ class NozIE(InfoExtractor):
|
|||||||
duration = int_or_none(xpath_text(
|
duration = int_or_none(xpath_text(
|
||||||
doc, './/article/movie/file/duration'))
|
doc, './/article/movie/file/duration'))
|
||||||
formats = []
|
formats = []
|
||||||
for qnode in doc.findall('.//article/movie/file/qualities/qual'):
|
for qnode in doc.findall(compat_xpath('.//article/movie/file/qualities/qual')):
|
||||||
http_url_ele = find_xpath_attr(
|
http_url_ele = find_xpath_attr(
|
||||||
qnode, './html_urls/video_url', 'format', 'video/mp4')
|
qnode, './html_urls/video_url', 'format', 'video/mp4')
|
||||||
http_url = http_url_ele.text if http_url_ele is not None else None
|
http_url = http_url_ele.text if http_url_ele is not None else None
|
||||||
|
38
youtube_dl/extractor/once.py
Normal file
38
youtube_dl/extractor/once.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class OnceIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://once\.unicornmedia\.com/now/[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)'
|
||||||
|
ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8'
|
||||||
|
PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4'
|
||||||
|
|
||||||
|
def _extract_once_formats(self, url):
|
||||||
|
domain_id, application_id, media_item_id = re.match(
|
||||||
|
OnceIE._VALID_URL, url).groups()
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
self.ADAPTIVE_URL_TEMPLATE % (
|
||||||
|
domain_id, application_id, media_item_id),
|
||||||
|
media_item_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||||
|
progressive_formats = []
|
||||||
|
for adaptive_format in formats:
|
||||||
|
rendition_id = self._search_regex(
|
||||||
|
r'/now/media/playlist/[^/]+/[^/]+/([^/]+)',
|
||||||
|
adaptive_format['url'], 'redition id', default=None)
|
||||||
|
if rendition_id:
|
||||||
|
progressive_format = adaptive_format.copy()
|
||||||
|
progressive_format.update({
|
||||||
|
'url': self.PROGRESSIVE_URL_TEMPLATE % (
|
||||||
|
domain_id, application_id, rendition_id, media_item_id),
|
||||||
|
'format_id': adaptive_format['format_id'].replace(
|
||||||
|
'hls', 'http'),
|
||||||
|
'protocol': 'http',
|
||||||
|
})
|
||||||
|
progressive_formats.append(progressive_format)
|
||||||
|
self._check_formats(progressive_formats, media_item_id)
|
||||||
|
formats.extend(progressive_formats)
|
||||||
|
return formats
|
@ -2,6 +2,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
smuggle_url,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class SBSIE(InfoExtractor):
|
class SBSIE(InfoExtractor):
|
||||||
@ -31,21 +35,28 @@ class SBSIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
player_params = self._download_json(
|
||||||
|
'http://www.sbs.com.au/api/video_pdkvars/id/%s?form=json' % video_id, video_id)
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
error = player_params.get('error')
|
||||||
'http://www.sbs.com.au/ondemand/video/single/%s?context=web' % video_id, video_id)
|
if error:
|
||||||
|
error_message = 'Sorry, The video you are looking for does not exist.'
|
||||||
player_params = self._parse_json(
|
video_data = error.get('results') or {}
|
||||||
self._search_regex(
|
error_code = error.get('errorCode')
|
||||||
r'(?s)var\s+playerParams\s*=\s*({.+?});', webpage, 'playerParams'),
|
if error_code == 'ComingSoon':
|
||||||
video_id)
|
error_message = '%s is not yet available.' % video_data.get('title', '')
|
||||||
|
elif error_code in ('Forbidden', 'intranetAccessOnly'):
|
||||||
|
error_message = 'Sorry, This video cannot be accessed via this website'
|
||||||
|
elif error_code == 'Expired':
|
||||||
|
error_message = 'Sorry, %s is no longer available.' % video_data.get('title', '')
|
||||||
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
|
||||||
|
|
||||||
urls = player_params['releaseUrls']
|
urls = player_params['releaseUrls']
|
||||||
theplatform_url = (urls.get('progressive') or urls.get('standard') or
|
theplatform_url = (urls.get('progressive') or urls.get('html') or
|
||||||
urls.get('html') or player_params['relatedItemsURL'])
|
urls.get('standard') or player_params['relatedItemsURL'])
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': theplatform_url,
|
'url': smuggle_url(theplatform_url, {'force_smil_url': True}),
|
||||||
}
|
}
|
||||||
|
@ -8,13 +8,12 @@ import binascii
|
|||||||
import hashlib
|
import hashlib
|
||||||
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .once import OnceIE
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@ -29,26 +28,27 @@ default_ns = 'http://www.w3.org/2005/SMIL21/Language'
|
|||||||
_x = lambda p: xpath_with_ns(p, {'smil': default_ns})
|
_x = lambda p: xpath_with_ns(p, {'smil': default_ns})
|
||||||
|
|
||||||
|
|
||||||
class ThePlatformBaseIE(InfoExtractor):
|
class ThePlatformBaseIE(OnceIE):
|
||||||
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
|
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
|
||||||
meta = self._download_xml(smil_url, video_id, note=note)
|
meta = self._download_xml(smil_url, video_id, note=note, query={'format': 'SMIL'})
|
||||||
error_element = find_xpath_attr(
|
error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
|
||||||
meta, _x('.//smil:ref'), 'src',
|
if error_element is not None and error_element.attrib['src'].startswith(
|
||||||
'http://link.theplatform.com/s/errorFiles/Unavailable.mp4')
|
'http://link.theplatform.com/s/errorFiles/Unavailable.'):
|
||||||
if error_element is not None:
|
|
||||||
raise ExtractorError(error_element.attrib['abstract'], expected=True)
|
raise ExtractorError(error_element.attrib['abstract'], expected=True)
|
||||||
|
|
||||||
formats = self._parse_smil_formats(
|
smil_formats = self._parse_smil_formats(
|
||||||
meta, smil_url, video_id, namespace=default_ns,
|
meta, smil_url, video_id, namespace=default_ns,
|
||||||
# the parameters are from syfy.com, other sites may use others,
|
# the parameters are from syfy.com, other sites may use others,
|
||||||
# they also work for nbc.com
|
# they also work for nbc.com
|
||||||
f4m_params={'g': 'UXWGVKRWHFSP', 'hdcore': '3.0.3'},
|
f4m_params={'g': 'UXWGVKRWHFSP', 'hdcore': '3.0.3'},
|
||||||
transform_rtmp_url=lambda streamer, src: (streamer, 'mp4:' + src))
|
transform_rtmp_url=lambda streamer, src: (streamer, 'mp4:' + src))
|
||||||
|
|
||||||
for _format in formats:
|
formats = []
|
||||||
ext = determine_ext(_format['url'])
|
for _format in smil_formats:
|
||||||
if ext == 'once':
|
if OnceIE.suitable(_format['url']):
|
||||||
_format['ext'] = 'mp4'
|
formats.extend(self._extract_once_formats(_format['url']))
|
||||||
|
else:
|
||||||
|
formats.append(_format)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
@ -125,7 +125,7 @@ class ThePlatformIE(ThePlatformBaseIE):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://player.theplatform.com/p/2E2eJC/nbcNewsOffsite?guid=tdy_or_siri_150701',
|
'url': 'http://player.theplatform.com/p/2E2eJC/nbcNewsOffsite?guid=tdy_or_siri_150701',
|
||||||
'md5': '734f3790fb5fc4903da391beeebc4836',
|
'md5': 'fb96bb3d85118930a5b055783a3bd992',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'tdy_or_siri_150701',
|
'id': 'tdy_or_siri_150701',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -135,7 +135,6 @@ class ThePlatformIE(ThePlatformBaseIE):
|
|||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'timestamp': 1435752600,
|
'timestamp': 1435752600,
|
||||||
'upload_date': '20150701',
|
'upload_date': '20150701',
|
||||||
'categories': ['Today/Shows/Orange Room', 'Today/Sections/Money', 'Today/Topics/Tech', "Today/Topics/Editor's picks"],
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1
|
# From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1
|
||||||
@ -213,7 +212,7 @@ class ThePlatformIE(ThePlatformBaseIE):
|
|||||||
webpage, 'smil url', group='url')
|
webpage, 'smil url', group='url')
|
||||||
path = self._search_regex(
|
path = self._search_regex(
|
||||||
r'link\.theplatform\.com/s/((?:[^/?#&]+/)+[^/?#&]+)', smil_url, 'path')
|
r'link\.theplatform\.com/s/((?:[^/?#&]+/)+[^/?#&]+)', smil_url, 'path')
|
||||||
smil_url += '?' if '?' not in smil_url else '&' + 'formats=m3u,mpeg4&format=SMIL'
|
smil_url += '?' if '?' not in smil_url else '&' + 'formats=m3u,mpeg4'
|
||||||
elif mobj.group('config'):
|
elif mobj.group('config'):
|
||||||
config_url = url + '&form=json'
|
config_url = url + '&form=json'
|
||||||
config_url = config_url.replace('swf/', 'config/')
|
config_url = config_url.replace('swf/', 'config/')
|
||||||
@ -223,9 +222,9 @@ class ThePlatformIE(ThePlatformBaseIE):
|
|||||||
release_url = config['releaseUrl']
|
release_url = config['releaseUrl']
|
||||||
else:
|
else:
|
||||||
release_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
|
release_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
|
||||||
smil_url = release_url + '&format=SMIL&formats=MPEG4&manifest=f4m'
|
smil_url = release_url + '&formats=MPEG4&manifest=f4m'
|
||||||
else:
|
else:
|
||||||
smil_url = 'http://link.theplatform.com/s/%s/meta.smil?format=smil&mbr=true' % path
|
smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
|
||||||
|
|
||||||
sig = smuggled_data.get('sig')
|
sig = smuggled_data.get('sig')
|
||||||
if sig:
|
if sig:
|
||||||
@ -250,7 +249,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
|||||||
_TEST = {
|
_TEST = {
|
||||||
# From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
|
# From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
|
||||||
'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
|
'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
|
||||||
'md5': '22d2b84f058d3586efcd99e57d59d314',
|
'md5': '6e32495b5073ab414471b615c5ded394',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'n_hardball_5biden_140207',
|
'id': 'n_hardball_5biden_140207',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -280,7 +279,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
|||||||
first_video_id = None
|
first_video_id = None
|
||||||
duration = None
|
duration = None
|
||||||
for item in entry['media$content']:
|
for item in entry['media$content']:
|
||||||
smil_url = item['plfile$url'] + '&format=SMIL&mbr=true'
|
smil_url = item['plfile$url'] + '&mbr=true'
|
||||||
cur_video_id = ThePlatformIE._match_id(smil_url)
|
cur_video_id = ThePlatformIE._match_id(smil_url)
|
||||||
if first_video_id is None:
|
if first_video_id is None:
|
||||||
first_video_id = cur_video_id
|
first_video_id = cur_video_id
|
||||||
|
33
youtube_dl/extractor/tv3.py
Normal file
33
youtube_dl/extractor/tv3.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class TV3IE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?tv3\.co\.nz/(?P<id>[^/]+)/tabid/\d+/articleID/\d+/MCat/\d+/Default\.aspx'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.tv3.co.nz/MOTORSPORT-SRS-SsangYong-Hampton-Downs-Round-3/tabid/3692/articleID/121615/MCat/2915/Default.aspx',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4659127992001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'CRC Motorsport: SRS SsangYong Hampton Downs Round 3 - S2015 Ep3',
|
||||||
|
'description': 'SsangYong Racing Series returns for Round 3 with drivers from New Zealand and Australia taking to the grid at Hampton Downs raceway.',
|
||||||
|
'uploader_id': '3812193411001',
|
||||||
|
'upload_date': '20151213',
|
||||||
|
'timestamp': 1449975272,
|
||||||
|
},
|
||||||
|
'expected_warnings': [
|
||||||
|
'Failed to download MPD manifest'
|
||||||
|
],
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/3812193411001/default_default/index.html?videoId=%s'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
brightcove_id = self._search_regex(r'<param\s*name="@videoPlayer"\s*value="(\d+)"', webpage, 'brightcove id')
|
||||||
|
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
@ -144,7 +144,8 @@ class UdemyIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, lecture_id)
|
webpage = self._download_webpage(url, lecture_id)
|
||||||
|
|
||||||
course_id = self._search_regex(
|
course_id = self._search_regex(
|
||||||
r'data-course-id=["\'](\d+)', webpage, 'course id')
|
(r'data-course-id=["\'](\d+)', r'"id"\s*:\s*(\d+)'),
|
||||||
|
webpage, 'course id')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
lecture = self._download_lecture(course_id, lecture_id)
|
lecture = self._download_lecture(course_id, lecture_id)
|
||||||
|
@ -4,6 +4,7 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -18,6 +19,9 @@ class WistiaIE(InfoExtractor):
|
|||||||
'id': 'sh7fpupwlt',
|
'id': 'sh7fpupwlt',
|
||||||
'ext': 'mov',
|
'ext': 'mov',
|
||||||
'title': 'Being Resourceful',
|
'title': 'Being Resourceful',
|
||||||
|
'description': 'a Clients From Hell Video Series video from worldwidewebhosting',
|
||||||
|
'upload_date': '20131204',
|
||||||
|
'timestamp': 1386185018,
|
||||||
'duration': 117,
|
'duration': 117,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@ -32,26 +36,32 @@ class WistiaIE(InfoExtractor):
|
|||||||
raise ExtractorError('Error while getting the playlist',
|
raise ExtractorError('Error while getting the playlist',
|
||||||
expected=True)
|
expected=True)
|
||||||
data = data_json['media']
|
data = data_json['media']
|
||||||
|
title = data['name']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
for a in data['assets']:
|
for a in data['assets']:
|
||||||
|
astatus = a.get('status')
|
||||||
atype = a.get('type')
|
atype = a.get('type')
|
||||||
if atype == 'still':
|
if (astatus is not None and astatus != 2) or atype == 'preview':
|
||||||
|
continue
|
||||||
|
elif atype in ('still', 'still_image'):
|
||||||
thumbnails.append({
|
thumbnails.append({
|
||||||
'url': a['url'],
|
'url': a['url'],
|
||||||
'resolution': '%dx%d' % (a['width'], a['height']),
|
'resolution': '%dx%d' % (a['width'], a['height']),
|
||||||
})
|
})
|
||||||
continue
|
else:
|
||||||
if atype == 'preview':
|
|
||||||
continue
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': atype,
|
'format_id': atype,
|
||||||
'url': a['url'],
|
'url': a['url'],
|
||||||
'width': a['width'],
|
'tbr': int_or_none(a.get('bitrate')),
|
||||||
'height': a['height'],
|
'vbr': int_or_none(a.get('opt_vbitrate')),
|
||||||
'filesize': a['size'],
|
'width': int_or_none(a.get('width')),
|
||||||
'ext': a['ext'],
|
'height': int_or_none(a.get('height')),
|
||||||
|
'filesize': int_or_none(a.get('size')),
|
||||||
|
'vcodec': a.get('codec'),
|
||||||
|
'container': a.get('container'),
|
||||||
|
'ext': a.get('ext'),
|
||||||
'preference': 1 if atype == 'original' else None,
|
'preference': 1 if atype == 'original' else None,
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -59,8 +69,10 @@ class WistiaIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': data['name'],
|
'title': title,
|
||||||
|
'description': data.get('seoDescription'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'duration': data.get('duration'),
|
'duration': int_or_none(data.get('duration')),
|
||||||
|
'timestamp': int_or_none(data.get('createdAt')),
|
||||||
}
|
}
|
||||||
|
@ -309,6 +309,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
|
'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
|
||||||
|
|
||||||
# Apple HTTP Live Streaming
|
# Apple HTTP Live Streaming
|
||||||
|
'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
|
||||||
'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
|
'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
|
||||||
'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
|
'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
|
||||||
'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
|
'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
|
||||||
|
@ -35,6 +35,7 @@ import xml.etree.ElementTree
|
|||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
from .compat import (
|
from .compat import (
|
||||||
|
compat_HTMLParser,
|
||||||
compat_basestring,
|
compat_basestring,
|
||||||
compat_chr,
|
compat_chr,
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
@ -49,6 +50,7 @@ from .compat import (
|
|||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
|
compat_xpath,
|
||||||
shlex_quote,
|
shlex_quote,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -164,12 +166,7 @@ if sys.version_info >= (2, 7):
|
|||||||
return node.find(expr)
|
return node.find(expr)
|
||||||
else:
|
else:
|
||||||
def find_xpath_attr(node, xpath, key, val=None):
|
def find_xpath_attr(node, xpath, key, val=None):
|
||||||
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
|
for f in node.findall(compat_xpath(xpath)):
|
||||||
# .//node does not match if a node is a direct child of . !
|
|
||||||
if isinstance(xpath, compat_str):
|
|
||||||
xpath = xpath.encode('ascii')
|
|
||||||
|
|
||||||
for f in node.findall(xpath):
|
|
||||||
if key not in f.attrib:
|
if key not in f.attrib:
|
||||||
continue
|
continue
|
||||||
if val is None or f.attrib.get(key) == val:
|
if val is None or f.attrib.get(key) == val:
|
||||||
@ -194,9 +191,7 @@ def xpath_with_ns(path, ns_map):
|
|||||||
|
|
||||||
def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
|
def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
|
||||||
def _find_xpath(xpath):
|
def _find_xpath(xpath):
|
||||||
if sys.version_info < (2, 7): # Crazy 2.6
|
return node.find(compat_xpath(xpath))
|
||||||
xpath = xpath.encode('ascii')
|
|
||||||
return node.find(xpath)
|
|
||||||
|
|
||||||
if isinstance(xpath, (str, compat_str)):
|
if isinstance(xpath, (str, compat_str)):
|
||||||
n = _find_xpath(xpath)
|
n = _find_xpath(xpath)
|
||||||
@ -273,6 +268,38 @@ def get_element_by_attribute(attribute, value, html):
|
|||||||
return unescapeHTML(res)
|
return unescapeHTML(res)
|
||||||
|
|
||||||
|
|
||||||
|
class HTMLAttributeParser(compat_HTMLParser):
|
||||||
|
"""Trivial HTML parser to gather the attributes for a single element"""
|
||||||
|
def __init__(self):
|
||||||
|
self.attrs = {}
|
||||||
|
compat_HTMLParser.__init__(self)
|
||||||
|
|
||||||
|
def handle_starttag(self, tag, attrs):
|
||||||
|
self.attrs = dict(attrs)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_attributes(html_element):
|
||||||
|
"""Given a string for an HTML element such as
|
||||||
|
<el
|
||||||
|
a="foo" B="bar" c="&98;az" d=boz
|
||||||
|
empty= noval entity="&"
|
||||||
|
sq='"' dq="'"
|
||||||
|
>
|
||||||
|
Decode and return a dictionary of attributes.
|
||||||
|
{
|
||||||
|
'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
|
||||||
|
'empty': '', 'noval': None, 'entity': '&',
|
||||||
|
'sq': '"', 'dq': '\''
|
||||||
|
}.
|
||||||
|
NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
|
||||||
|
but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
|
||||||
|
"""
|
||||||
|
parser = HTMLAttributeParser()
|
||||||
|
parser.feed(html_element)
|
||||||
|
parser.close()
|
||||||
|
return parser.attrs
|
||||||
|
|
||||||
|
|
||||||
def clean_html(html):
|
def clean_html(html):
|
||||||
"""Clean an HTML snippet into a readable string"""
|
"""Clean an HTML snippet into a readable string"""
|
||||||
|
|
||||||
@ -1319,7 +1346,7 @@ def format_bytes(bytes):
|
|||||||
def lookup_unit_table(unit_table, s):
|
def lookup_unit_table(unit_table, s):
|
||||||
units_re = '|'.join(re.escape(u) for u in unit_table)
|
units_re = '|'.join(re.escape(u) for u in unit_table)
|
||||||
m = re.match(
|
m = re.match(
|
||||||
r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
|
r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
|
||||||
if not m:
|
if not m:
|
||||||
return None
|
return None
|
||||||
num_str = m.group('num').replace(',', '.')
|
num_str = m.group('num').replace(',', '.')
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2016.03.14'
|
__version__ = '2016.03.18'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user