Merge branch 'master' of https://github.com/rg3/youtube-dl
This commit is contained in:
commit
7bd772a69f
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.05.26*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.06.14*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.05.26**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.06.14**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2018.05.26
|
[debug] youtube-dl version 2018.06.14
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -47,3 +47,4 @@ youtube-dl.zsh
|
|||||||
*.iml
|
*.iml
|
||||||
|
|
||||||
tmp/
|
tmp/
|
||||||
|
venv/
|
||||||
|
82
ChangeLog
82
ChangeLog
@ -1,3 +1,85 @@
|
|||||||
|
version 2018.06.14
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [downloader/http] Fix retry on error when streaming to stdout (#16699)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [discoverynetworks] Add support for disco-api videos (#16724)
|
||||||
|
+ [dailymotion] Add support for password protected videos (#9789)
|
||||||
|
+ [abc:iview] Add support for livestreams (#12354)
|
||||||
|
* [abc:iview] Fix extraction (#16704)
|
||||||
|
+ [crackle] Add support for sonycrackle.com (#16698)
|
||||||
|
+ [tvnet] Add support for tvnet.gov.vn (#15462)
|
||||||
|
* [nrk] Update API hosts and try all previously known ones (#16690)
|
||||||
|
* [wimp] Fix Youtube embeds extraction
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.06.11
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [npo] Extend URL regular expression and add support for npostart.nl (#16682)
|
||||||
|
+ [inc] Add support for another embed schema (#16666)
|
||||||
|
* [tv4] Fix format extraction (#16650)
|
||||||
|
+ [nexx] Add support for free cdn (#16538)
|
||||||
|
+ [pbs] Add another cove id pattern (#15373)
|
||||||
|
+ [rbmaradio] Add support for 192k format (#16631)
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.06.04
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [camtube] Add support for camtube.co
|
||||||
|
+ [twitter:card] Extract guest token (#16609)
|
||||||
|
+ [chaturbate] Use geo verification headers
|
||||||
|
+ [bbc] Add support for bbcthree (#16612)
|
||||||
|
* [youtube] Move metadata extraction after video availability check
|
||||||
|
+ [youtube] Extract track and artist
|
||||||
|
+ [safari] Add support for new URL schema (#16614)
|
||||||
|
* [adn] Fix extraction
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.06.02
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [utils] Improve determine_ext
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [facebook] Add support for tahoe player videos (#15441, #16554)
|
||||||
|
* [cbc] Improve extraction (#16583, #16593)
|
||||||
|
* [openload] Improve ext extraction (#16595)
|
||||||
|
+ [twitter:card] Add support for another endpoint (#16586)
|
||||||
|
+ [openload] Add support for oload.win and oload.download (#16592)
|
||||||
|
* [audimedia] Fix extraction (#15309)
|
||||||
|
+ [francetv] Add support for sport.francetvinfo.fr (#15645)
|
||||||
|
* [mlb] Improve extraction (#16587)
|
||||||
|
- [nhl] Remove old extractors
|
||||||
|
* [rbmaradio] Check formats availability (#16585)
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.05.30
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [downloader/rtmp] Generalize download messages and report time elapsed
|
||||||
|
on finish
|
||||||
|
* [downloader/rtmp] Gracefully handle live streams interrupted by user
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [teamcoco] Fix extraction for full episodes (#16573)
|
||||||
|
* [spiegel] Fix info extraction (#16538)
|
||||||
|
+ [apa] Add support for apa.at (#15041, #15672)
|
||||||
|
+ [bellmedia] Add support for bnnbloomberg.ca (#16560)
|
||||||
|
+ [9c9media] Extract MPD formats and subtitles
|
||||||
|
* [cammodels] Use geo verification headers
|
||||||
|
+ [ufctv] Add support for authentication (#16542)
|
||||||
|
+ [cammodels] Add support for cammodels.com (#14499)
|
||||||
|
* [utils] Fix style id extraction for namespaced id attribute in dfxp2srt
|
||||||
|
(#16551)
|
||||||
|
* [soundcloud] Detect format extension (#16549)
|
||||||
|
* [cbc] Fix playlist title extraction (#16502)
|
||||||
|
+ [tumblr] Detect and report sensitive media (#13829)
|
||||||
|
+ [tumblr] Add support for authentication (#15133)
|
||||||
|
|
||||||
|
|
||||||
version 2018.05.26
|
version 2018.05.26
|
||||||
|
|
||||||
Core
|
Core
|
||||||
|
@ -13,7 +13,7 @@ year = str(datetime.datetime.now().year)
|
|||||||
for fn in glob.glob('*.html*'):
|
for fn in glob.glob('*.html*'):
|
||||||
with io.open(fn, encoding='utf-8') as f:
|
with io.open(fn, encoding='utf-8') as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
newc = re.sub(r'(?P<copyright>Copyright © 2006-)(?P<year>[0-9]{4})', 'Copyright © 2006-' + year, content)
|
newc = re.sub(r'(?P<copyright>Copyright © 2011-)(?P<year>[0-9]{4})', 'Copyright © 2011-' + year, content)
|
||||||
if content != newc:
|
if content != newc:
|
||||||
tmpFn = fn + '.part'
|
tmpFn = fn + '.part'
|
||||||
with io.open(tmpFn, 'wt', encoding='utf-8') as outf:
|
with io.open(tmpFn, 'wt', encoding='utf-8') as outf:
|
||||||
|
@ -15,7 +15,6 @@
|
|||||||
- **8tracks**
|
- **8tracks**
|
||||||
- **91porn**
|
- **91porn**
|
||||||
- **9c9media**
|
- **9c9media**
|
||||||
- **9c9media:stack**
|
|
||||||
- **9gag**
|
- **9gag**
|
||||||
- **9now.com.au**
|
- **9now.com.au**
|
||||||
- **abc.net.au**
|
- **abc.net.au**
|
||||||
@ -48,6 +47,7 @@
|
|||||||
- **anitube.se**
|
- **anitube.se**
|
||||||
- **Anvato**
|
- **Anvato**
|
||||||
- **AnySex**
|
- **AnySex**
|
||||||
|
- **APA**
|
||||||
- **Aparat**
|
- **Aparat**
|
||||||
- **AppleConnect**
|
- **AppleConnect**
|
||||||
- **AppleDaily**: 臺灣蘋果日報
|
- **AppleDaily**: 臺灣蘋果日報
|
||||||
@ -128,6 +128,8 @@
|
|||||||
- **BYUtv**
|
- **BYUtv**
|
||||||
- **Camdemy**
|
- **Camdemy**
|
||||||
- **CamdemyFolder**
|
- **CamdemyFolder**
|
||||||
|
- **CamModels**
|
||||||
|
- **CamTube**
|
||||||
- **CamWithHer**
|
- **CamWithHer**
|
||||||
- **canalc2.tv**
|
- **canalc2.tv**
|
||||||
- **Canalplus**: mycanal.fr and piwiplus.fr
|
- **Canalplus**: mycanal.fr and piwiplus.fr
|
||||||
@ -552,9 +554,6 @@
|
|||||||
- **nfl.com**
|
- **nfl.com**
|
||||||
- **NhkVod**
|
- **NhkVod**
|
||||||
- **nhl.com**
|
- **nhl.com**
|
||||||
- **nhl.com:news**: NHL news
|
|
||||||
- **nhl.com:videocenter**
|
|
||||||
- **nhl.com:videocenter:category**: NHL videocenter category
|
|
||||||
- **nick.com**
|
- **nick.com**
|
||||||
- **nick.de**
|
- **nick.de**
|
||||||
- **nickelodeon:br**
|
- **nickelodeon:br**
|
||||||
@ -792,6 +791,7 @@
|
|||||||
- **Spiegel**
|
- **Spiegel**
|
||||||
- **Spiegel:Article**: Articles on spiegel.de
|
- **Spiegel:Article**: Articles on spiegel.de
|
||||||
- **Spiegeltv**
|
- **Spiegeltv**
|
||||||
|
- **sport.francetvinfo.fr**
|
||||||
- **Sport5**
|
- **Sport5**
|
||||||
- **SportBoxEmbed**
|
- **SportBoxEmbed**
|
||||||
- **SportDeutschland**
|
- **SportDeutschland**
|
||||||
@ -893,6 +893,7 @@
|
|||||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||||
- **tvland.com**
|
- **tvland.com**
|
||||||
- **TVN24**
|
- **TVN24**
|
||||||
|
- **TVNet**
|
||||||
- **TVNoe**
|
- **TVNoe**
|
||||||
- **TVNow**
|
- **TVNow**
|
||||||
- **TVNowList**
|
- **TVNowList**
|
||||||
|
@ -2,5 +2,5 @@
|
|||||||
universal = True
|
universal = True
|
||||||
|
|
||||||
[flake8]
|
[flake8]
|
||||||
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git
|
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv
|
||||||
ignore = E402,E501,E731,E741
|
ignore = E402,E501,E731,E741
|
||||||
|
@ -361,6 +361,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None)
|
self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None)
|
||||||
self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None)
|
self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None)
|
||||||
self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8')
|
self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8')
|
||||||
|
self.assertEqual(determine_ext('foobar', None), None)
|
||||||
|
|
||||||
def test_find_xpath_attr(self):
|
def test_find_xpath_attr(self):
|
||||||
testxml = '''<root>
|
testxml = '''<root>
|
||||||
|
@ -217,10 +217,11 @@ class HttpFD(FileDownloader):
|
|||||||
before = start # start measuring
|
before = start # start measuring
|
||||||
|
|
||||||
def retry(e):
|
def retry(e):
|
||||||
if ctx.tmpfilename != '-':
|
to_stdout = ctx.tmpfilename == '-'
|
||||||
|
if not to_stdout:
|
||||||
ctx.stream.close()
|
ctx.stream.close()
|
||||||
ctx.stream = None
|
ctx.stream = None
|
||||||
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
|
ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||||
raise RetryDownload(e)
|
raise RetryDownload(e)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
@ -105,22 +105,22 @@ class ABCIE(InfoExtractor):
|
|||||||
|
|
||||||
class ABCIViewIE(InfoExtractor):
|
class ABCIViewIE(InfoExtractor):
|
||||||
IE_NAME = 'abc.net.au:iview'
|
IE_NAME = 'abc.net.au:iview'
|
||||||
_VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
|
||||||
_GEO_COUNTRIES = ['AU']
|
_GEO_COUNTRIES = ['AU']
|
||||||
|
|
||||||
# ABC iview programs are normally available for 14 days only.
|
# ABC iview programs are normally available for 14 days only.
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://iview.abc.net.au/programs/ben-and-hollys-little-kingdom/ZY9247A021S00',
|
'url': 'https://iview.abc.net.au/show/ben-and-hollys-little-kingdom/series/0/video/ZX9371A050S00',
|
||||||
'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
|
'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'ZY9247A021S00',
|
'id': 'ZX9371A050S00',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Gaston's Visit",
|
'title': "Gaston's Birthday",
|
||||||
'series': "Ben And Holly's Little Kingdom",
|
'series': "Ben And Holly's Little Kingdom",
|
||||||
'description': 'md5:18db170ad71cf161e006a4c688e33155',
|
'description': 'md5:f9de914d02f226968f598ac76f105bcf',
|
||||||
'upload_date': '20180318',
|
'upload_date': '20180604',
|
||||||
'uploader_id': 'abc4kids',
|
'uploader_id': 'abc4kids',
|
||||||
'timestamp': 1521400959,
|
'timestamp': 1528140219,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -129,17 +129,16 @@ class ABCIViewIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
video_params = self._download_json(
|
||||||
video_params = self._parse_json(self._search_regex(
|
'https://iview.abc.net.au/api/programs/' + video_id, video_id)
|
||||||
r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id)
|
title = unescapeHTML(video_params.get('title') or video_params['seriesTitle'])
|
||||||
title = video_params.get('title') or video_params['seriesTitle']
|
stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream'))
|
||||||
stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
|
|
||||||
|
|
||||||
house_number = video_params.get('episodeHouseNumber')
|
house_number = video_params.get('episodeHouseNumber') or video_id
|
||||||
path = '/auth/hls/sign?ts={0}&hn={1}&d=android-mobile'.format(
|
path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format(
|
||||||
int(time.time()), house_number)
|
int(time.time()), house_number)
|
||||||
sig = hmac.new(
|
sig = hmac.new(
|
||||||
'android.content.res.Resources'.encode('utf-8'),
|
b'android.content.res.Resources',
|
||||||
path.encode('utf-8'), hashlib.sha256).hexdigest()
|
path.encode('utf-8'), hashlib.sha256).hexdigest()
|
||||||
token = self._download_webpage(
|
token = self._download_webpage(
|
||||||
'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
|
'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
|
||||||
@ -169,18 +168,26 @@ class ABCIViewIE(InfoExtractor):
|
|||||||
'ext': 'vtt',
|
'ext': 'vtt',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
is_live = video_params.get('livestream') == '1'
|
||||||
|
if is_live:
|
||||||
|
title = self._live_title(title)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': unescapeHTML(title),
|
'title': title,
|
||||||
'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
|
'description': video_params.get('description'),
|
||||||
'thumbnail': self._html_search_meta(['og:image', 'twitter:image:src'], webpage),
|
'thumbnail': video_params.get('thumbnail'),
|
||||||
'duration': int_or_none(video_params.get('eventDuration')),
|
'duration': int_or_none(video_params.get('eventDuration')),
|
||||||
'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
|
'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
|
||||||
'series': unescapeHTML(video_params.get('seriesTitle')),
|
'series': unescapeHTML(video_params.get('seriesTitle')),
|
||||||
'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
|
'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
|
||||||
'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)),
|
'season_number': int_or_none(self._search_regex(
|
||||||
'episode': self._html_search_meta('episode_title', webpage, default=None),
|
r'\bSeries\s+(\d+)\b', title, 'season number', default=None)),
|
||||||
|
'episode_number': int_or_none(self._search_regex(
|
||||||
|
r'\bEp\s+(\d+)\b', title, 'episode number', default=None)),
|
||||||
|
'episode_id': house_number,
|
||||||
'uploader_id': video_params.get('channel'),
|
'uploader_id': video_params.get('channel'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
'is_live': is_live,
|
||||||
}
|
}
|
||||||
|
@ -1,8 +1,11 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import binascii
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import random
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..aes import aes_cbc_decrypt
|
from ..aes import aes_cbc_decrypt
|
||||||
@ -12,9 +15,12 @@ from ..compat import (
|
|||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
bytes_to_intlist,
|
bytes_to_intlist,
|
||||||
|
bytes_to_long,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
intlist_to_bytes,
|
intlist_to_bytes,
|
||||||
|
long_to_bytes,
|
||||||
|
pkcs1pad,
|
||||||
srt_subtitles_timecode,
|
srt_subtitles_timecode,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
urljoin,
|
urljoin,
|
||||||
@ -35,6 +41,7 @@ class ADNIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
_BASE_URL = 'http://animedigitalnetwork.fr'
|
_BASE_URL = 'http://animedigitalnetwork.fr'
|
||||||
|
_RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537)
|
||||||
|
|
||||||
def _get_subtitles(self, sub_path, video_id):
|
def _get_subtitles(self, sub_path, video_id):
|
||||||
if not sub_path:
|
if not sub_path:
|
||||||
@ -42,16 +49,14 @@ class ADNIE(InfoExtractor):
|
|||||||
|
|
||||||
enc_subtitles = self._download_webpage(
|
enc_subtitles = self._download_webpage(
|
||||||
urljoin(self._BASE_URL, sub_path),
|
urljoin(self._BASE_URL, sub_path),
|
||||||
video_id, fatal=False, headers={
|
video_id, fatal=False)
|
||||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0',
|
|
||||||
})
|
|
||||||
if not enc_subtitles:
|
if not enc_subtitles:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||||
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
|
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
|
||||||
bytes_to_intlist(b'\xc8\x6e\x06\xbc\xbe\xc6\x49\xf5\x88\x0d\xc8\x47\xc4\x27\x0c\x60'),
|
bytes_to_intlist(binascii.unhexlify(self._K + '9032ad7083106400')),
|
||||||
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
|
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
|
||||||
))
|
))
|
||||||
subtitles_json = self._parse_json(
|
subtitles_json = self._parse_json(
|
||||||
@ -112,11 +117,24 @@ class ADNIE(InfoExtractor):
|
|||||||
error = None
|
error = None
|
||||||
if not links:
|
if not links:
|
||||||
links_url = player_config.get('linksurl') or options['videoUrl']
|
links_url = player_config.get('linksurl') or options['videoUrl']
|
||||||
links_data = self._download_json(urljoin(
|
token = options['token']
|
||||||
self._BASE_URL, links_url), video_id)
|
self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
|
||||||
|
message = bytes_to_intlist(json.dumps({
|
||||||
|
'k': self._K,
|
||||||
|
'e': 60,
|
||||||
|
't': token,
|
||||||
|
}))
|
||||||
|
padded_message = intlist_to_bytes(pkcs1pad(message, 128))
|
||||||
|
n, e = self._RSA_KEY
|
||||||
|
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
|
||||||
|
authorization = base64.b64encode(encrypted_message).decode()
|
||||||
|
links_data = self._download_json(
|
||||||
|
urljoin(self._BASE_URL, links_url), video_id, headers={
|
||||||
|
'Authorization': 'Bearer ' + authorization,
|
||||||
|
})
|
||||||
links = links_data.get('links') or {}
|
links = links_data.get('links') or {}
|
||||||
metas = metas or links_data.get('meta') or {}
|
metas = metas or links_data.get('meta') or {}
|
||||||
sub_path = sub_path or links_data.get('subtitles')
|
sub_path = (sub_path or links_data.get('subtitles')) + '&token=' + token
|
||||||
error = links_data.get('error')
|
error = links_data.get('error')
|
||||||
title = metas.get('title') or video_info['title']
|
title = metas.get('title') or video_info['title']
|
||||||
|
|
||||||
|
@ -5,13 +5,12 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
sanitized_Request,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AudiMediaIE(InfoExtractor):
|
class AudiMediaIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?:video/)?(?P<id>[^/?#]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://www.audi-mediacenter.com/en/audimediatv/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-1467',
|
'url': 'https://www.audi-mediacenter.com/en/audimediatv/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-1467',
|
||||||
'md5': '79a8b71c46d49042609795ab59779b66',
|
'md5': '79a8b71c46d49042609795ab59779b66',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -24,41 +23,46 @@ class AudiMediaIE(InfoExtractor):
|
|||||||
'duration': 74022,
|
'duration': 74022,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
# extracted from https://audimedia.tv/assets/embed/embedded-player.js (dataSourceAuthToken)
|
'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991',
|
||||||
_AUTH_TOKEN = 'e25b42847dba18c6c8816d5d8ce94c326e06823ebf0859ed164b3ba169be97f2'
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
raw_payload = self._search_regex([
|
raw_payload = self._search_regex([
|
||||||
r'class="amtv-embed"[^>]+id="([^"]+)"',
|
r'class="amtv-embed"[^>]+id="([0-9a-z-]+)"',
|
||||||
r'class=\\"amtv-embed\\"[^>]+id=\\"([^"]+)\\"',
|
r'id="([0-9a-z-]+)"[^>]+class="amtv-embed"',
|
||||||
|
r'class=\\"amtv-embed\\"[^>]+id=\\"([0-9a-z-]+)\\"',
|
||||||
|
r'id=\\"([0-9a-z-]+)\\"[^>]+class=\\"amtv-embed\\"',
|
||||||
|
r'id=(?:\\)?"(amtve-[a-z]-\d+-[a-z]{2})',
|
||||||
], webpage, 'raw payload')
|
], webpage, 'raw payload')
|
||||||
_, stage_mode, video_id, lang = raw_payload.split('-')
|
_, stage_mode, video_id, _ = raw_payload.split('-')
|
||||||
|
|
||||||
# TODO: handle s and e stage_mode (live streams and ended live streams)
|
# TODO: handle s and e stage_mode (live streams and ended live streams)
|
||||||
if stage_mode not in ('s', 'e'):
|
if stage_mode not in ('s', 'e'):
|
||||||
request = sanitized_Request(
|
video_data = self._download_json(
|
||||||
'https://audimedia.tv/api/video/v1/videos/%s?embed[]=video_versions&embed[]=thumbnail_image&where[content_language_iso]=%s' % (video_id, lang),
|
'https://www.audimedia.tv/api/video/v1/videos/' + video_id,
|
||||||
headers={'X-Auth-Token': self._AUTH_TOKEN})
|
video_id, query={
|
||||||
json_data = self._download_json(request, video_id)['results']
|
'embed[]': ['video_versions', 'thumbnail_image'],
|
||||||
|
})['results']
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
stream_url_hls = json_data.get('stream_url_hls')
|
stream_url_hls = video_data.get('stream_url_hls')
|
||||||
if stream_url_hls:
|
if stream_url_hls:
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
stream_url_hls, video_id, 'mp4',
|
stream_url_hls, video_id, 'mp4',
|
||||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
stream_url_hds = json_data.get('stream_url_hds')
|
stream_url_hds = video_data.get('stream_url_hds')
|
||||||
if stream_url_hds:
|
if stream_url_hds:
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
stream_url_hds + '?hdcore=3.4.0',
|
stream_url_hds + '?hdcore=3.4.0',
|
||||||
video_id, f4m_id='hds', fatal=False))
|
video_id, f4m_id='hds', fatal=False))
|
||||||
|
|
||||||
for video_version in json_data.get('video_versions'):
|
for video_version in video_data.get('video_versions', []):
|
||||||
video_version_url = video_version.get('download_url') or video_version.get('stream_url')
|
video_version_url = video_version.get('download_url') or video_version.get('stream_url')
|
||||||
if not video_version_url:
|
if not video_version_url:
|
||||||
continue
|
continue
|
||||||
@ -79,11 +83,11 @@ class AudiMediaIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': json_data['title'],
|
'title': video_data['title'],
|
||||||
'description': json_data.get('subtitle'),
|
'description': video_data.get('subtitle'),
|
||||||
'thumbnail': json_data.get('thumbnail_image', {}).get('file'),
|
'thumbnail': video_data.get('thumbnail_image', {}).get('file'),
|
||||||
'timestamp': parse_iso8601(json_data.get('publication_date')),
|
'timestamp': parse_iso8601(video_data.get('publication_date')),
|
||||||
'duration': int_or_none(json_data.get('duration')),
|
'duration': int_or_none(video_data.get('duration')),
|
||||||
'view_count': int_or_none(json_data.get('view_count')),
|
'view_count': int_or_none(video_data.get('view_count')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -12,6 +12,7 @@ from ..utils import (
|
|||||||
float_or_none,
|
float_or_none,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
try_get,
|
try_get,
|
||||||
@ -772,6 +773,17 @@ class BBCIE(BBCCoUkIE):
|
|||||||
# single video article embedded with data-media-vpid
|
# single video article embedded with data-media-vpid
|
||||||
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p06556y7',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
||||||
|
'description': 'md5:4b7dfd063d5a789a1512e99662be3ddd',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -994,6 +1006,36 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bbc3_config = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
|
||||||
|
'bbcthree config', default='{}'),
|
||||||
|
playlist_id, transform_source=js_to_json, fatal=False)
|
||||||
|
if bbc3_config:
|
||||||
|
bbc3_playlist = try_get(
|
||||||
|
bbc3_config, lambda x: x['payload']['content']['bbcMedia']['playlist'],
|
||||||
|
dict)
|
||||||
|
if bbc3_playlist:
|
||||||
|
playlist_title = bbc3_playlist.get('title') or playlist_title
|
||||||
|
thumbnail = bbc3_playlist.get('holdingImageURL')
|
||||||
|
entries = []
|
||||||
|
for bbc3_item in bbc3_playlist['items']:
|
||||||
|
programme_id = bbc3_item.get('versionID')
|
||||||
|
if not programme_id:
|
||||||
|
continue
|
||||||
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
entries.append({
|
||||||
|
'id': programme_id,
|
||||||
|
'title': playlist_title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
})
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
def extract_all(pattern):
|
def extract_all(pattern):
|
||||||
return list(filter(None, map(
|
return list(filter(None, map(
|
||||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||||
|
69
youtube_dl/extractor/camtube.py
Normal file
69
youtube_dl/extractor/camtube.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CamTubeIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:(?:www|api)\.)?camtube\.co/recordings?/(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://camtube.co/recording/minafay-030618-1136-chaturbate-female',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '42ad3956-dd5b-445a-8313-803ea6079fac',
|
||||||
|
'display_id': 'minafay-030618-1136-chaturbate-female',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'minafay-030618-1136-chaturbate-female',
|
||||||
|
'duration': 1274,
|
||||||
|
'timestamp': 1528018608,
|
||||||
|
'upload_date': '20180603',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
_API_BASE = 'https://api.camtube.co'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
token = self._download_json(
|
||||||
|
'%s/rpc/session/new' % self._API_BASE, display_id,
|
||||||
|
'Downloading session token')['token']
|
||||||
|
|
||||||
|
self._set_cookie('api.camtube.co', 'session', token)
|
||||||
|
|
||||||
|
video = self._download_json(
|
||||||
|
'%s/recordings/%s' % (self._API_BASE, display_id), display_id,
|
||||||
|
headers={'Referer': url})
|
||||||
|
|
||||||
|
video_id = video['uuid']
|
||||||
|
timestamp = unified_timestamp(video.get('createdAt'))
|
||||||
|
duration = int_or_none(video.get('duration'))
|
||||||
|
view_count = int_or_none(video.get('viewCount'))
|
||||||
|
like_count = int_or_none(video.get('likeCount'))
|
||||||
|
creator = video.get('stageName')
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': '%s/recordings/%s/manifest.m3u8'
|
||||||
|
% (self._API_BASE, video_id),
|
||||||
|
'format_id': 'hls',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'protocol': 'm3u8_native',
|
||||||
|
}]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': display_id,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'like_count': like_count,
|
||||||
|
'creator': creator,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -17,6 +17,7 @@ from ..utils import (
|
|||||||
xpath_element,
|
xpath_element,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
|
orderedSet,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
@ -136,9 +137,15 @@ class CBCIE(InfoExtractor):
|
|||||||
entries = [
|
entries = [
|
||||||
self._extract_player_init(player_init, display_id)
|
self._extract_player_init(player_init, display_id)
|
||||||
for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
|
for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
|
||||||
|
media_ids = []
|
||||||
|
for media_id_re in (
|
||||||
|
r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"',
|
||||||
|
r'<div[^>]+\bid=["\']player-(\d+)',
|
||||||
|
r'guid["\']\s*:\s*["\'](\d+)'):
|
||||||
|
media_ids.extend(re.findall(media_id_re, webpage))
|
||||||
entries.extend([
|
entries.extend([
|
||||||
self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
||||||
for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)])
|
for media_id in orderedSet(media_ids)])
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, display_id, strip_or_none(title),
|
entries, display_id, strip_or_none(title),
|
||||||
self._og_search_description(webpage))
|
self._og_search_description(webpage))
|
||||||
|
@ -31,7 +31,8 @@ class ChaturbateIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(
|
||||||
|
url, video_id, headers=self.geo_verification_headers())
|
||||||
|
|
||||||
m3u8_urls = []
|
m3u8_urls = []
|
||||||
|
|
||||||
|
@ -19,8 +19,8 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class CrackleIE(InfoExtractor):
|
class CrackleIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
# geo restricted to CA
|
# geo restricted to CA
|
||||||
'url': 'https://www.crackle.com/andromeda/2502343',
|
'url': 'https://www.crackle.com/andromeda/2502343',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -45,7 +45,10 @@ class CrackleIE(InfoExtractor):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://www.sonycrackle.com/andromeda/2502343',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
@ -1,12 +1,16 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import base64
|
||||||
import json
|
import hashlib
|
||||||
import itertools
|
import itertools
|
||||||
|
import json
|
||||||
|
import random
|
||||||
|
import re
|
||||||
|
import string
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_struct_pack
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
@ -64,7 +68,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
'uploader': 'Deadline',
|
'uploader': 'Deadline',
|
||||||
'uploader_id': 'x1xm8ri',
|
'uploader_id': 'x1xm8ri',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'view_count': int,
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
||||||
@ -167,6 +170,17 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
player = self._parse_json(player_v5, video_id)
|
player = self._parse_json(player_v5, video_id)
|
||||||
metadata = player['metadata']
|
metadata = player['metadata']
|
||||||
|
|
||||||
|
if metadata.get('error', {}).get('type') == 'password_protected':
|
||||||
|
password = self._downloader.params.get('videopassword')
|
||||||
|
if password:
|
||||||
|
r = int(metadata['id'][1:], 36)
|
||||||
|
us64e = lambda x: base64.urlsafe_b64encode(x).decode().strip('=')
|
||||||
|
t = ''.join(random.choice(string.ascii_letters) for i in range(10))
|
||||||
|
n = us64e(compat_struct_pack('I', r))
|
||||||
|
i = us64e(hashlib.md5(('%s%d%s' % (password, r, t)).encode()).digest())
|
||||||
|
metadata = self._download_json(
|
||||||
|
'http://www.dailymotion.com/player/metadata/video/p' + i + t + n, video_id)
|
||||||
|
|
||||||
self._check_error(metadata)
|
self._check_error(metadata)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
@ -302,8 +316,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
|
|
||||||
def _check_error(self, info):
|
def _check_error(self, info):
|
||||||
error = info.get('error')
|
error = info.get('error')
|
||||||
if info.get('error') is not None:
|
if error:
|
||||||
title = error['title']
|
title = error.get('title') or error['message']
|
||||||
# See https://developer.dailymotion.com/api#access-error
|
# See https://developer.dailymotion.com/api#access-error
|
||||||
if error.get('code') == 'DM007':
|
if error.get('code') == 'DM007':
|
||||||
self.raise_geo_restricted(msg=title)
|
self.raise_geo_restricted(msg=title)
|
||||||
|
@ -3,8 +3,8 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from .brightcove import BrightcoveLegacyIE
|
from .brightcove import BrightcoveLegacyIE
|
||||||
|
from .dplay import DPlayIE
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
@ -12,8 +12,13 @@ from ..compat import (
|
|||||||
from ..utils import smuggle_url
|
from ..utils import smuggle_url
|
||||||
|
|
||||||
|
|
||||||
class DiscoveryNetworksDeIE(InfoExtractor):
|
class DiscoveryNetworksDeIE(DPlayIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:discovery|tlc|animalplanet|dmax)\.de/(?:.*#(?P<id>\d+)|(?:[^/]+/)*videos/(?P<title>[^/?#]+))'
|
_VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>discovery|tlc|animalplanet|dmax)\.de/
|
||||||
|
(?:
|
||||||
|
.*\#(?P<id>\d+)|
|
||||||
|
(?:[^/]+/)*videos/(?P<display_id>[^/?#]+)|
|
||||||
|
programme/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)
|
||||||
|
)'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
|
'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
|
||||||
@ -40,6 +45,14 @@ class DiscoveryNetworksDeIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
alternate_id = mobj.group('alternate_id')
|
||||||
|
if alternate_id:
|
||||||
|
self._initialize_geo_bypass({
|
||||||
|
'countries': ['DE'],
|
||||||
|
})
|
||||||
|
return self._get_disco_api_info(
|
||||||
|
url, '%s/%s' % (mobj.group('programme'), alternate_id),
|
||||||
|
'sonic-eu1-prod.disco-api.com', mobj.group('site') + 'de')
|
||||||
brightcove_id = mobj.group('id')
|
brightcove_id = mobj.group('id')
|
||||||
if not brightcove_id:
|
if not brightcove_id:
|
||||||
title = mobj.group('title')
|
title = mobj.group('title')
|
||||||
|
@ -97,6 +97,75 @@ class DPlayIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _get_disco_api_info(self, url, display_id, disco_host, realm):
|
||||||
|
disco_base = 'https://' + disco_host
|
||||||
|
token = self._download_json(
|
||||||
|
'%s/token' % disco_base, display_id, 'Downloading token',
|
||||||
|
query={
|
||||||
|
'realm': realm,
|
||||||
|
})['data']['attributes']['token']
|
||||||
|
headers = {
|
||||||
|
'Referer': url,
|
||||||
|
'Authorization': 'Bearer ' + token,
|
||||||
|
}
|
||||||
|
video = self._download_json(
|
||||||
|
'%s/content/videos/%s' % (disco_base, display_id), display_id,
|
||||||
|
headers=headers, query={
|
||||||
|
'include': 'show'
|
||||||
|
})
|
||||||
|
video_id = video['data']['id']
|
||||||
|
info = video['data']['attributes']
|
||||||
|
title = info['name']
|
||||||
|
formats = []
|
||||||
|
for format_id, format_dict in self._download_json(
|
||||||
|
'%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id),
|
||||||
|
display_id, headers=headers)['data']['attributes']['streaming'].items():
|
||||||
|
if not isinstance(format_dict, dict):
|
||||||
|
continue
|
||||||
|
format_url = format_dict.get('url')
|
||||||
|
if not format_url:
|
||||||
|
continue
|
||||||
|
ext = determine_ext(format_url)
|
||||||
|
if format_id == 'dash' or ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
format_url, display_id, mpd_id='dash', fatal=False))
|
||||||
|
elif format_id == 'hls' or ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, display_id, 'mp4',
|
||||||
|
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||||
|
fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
series = None
|
||||||
|
try:
|
||||||
|
included = video.get('included')
|
||||||
|
if isinstance(included, list):
|
||||||
|
show = next(e for e in included if e.get('type') == 'show')
|
||||||
|
series = try_get(
|
||||||
|
show, lambda x: x['attributes']['name'], compat_str)
|
||||||
|
except StopIteration:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': info.get('description'),
|
||||||
|
'duration': float_or_none(
|
||||||
|
info.get('videoDuration'), scale=1000),
|
||||||
|
'timestamp': unified_timestamp(info.get('publishStart')),
|
||||||
|
'series': series,
|
||||||
|
'season_number': int_or_none(info.get('seasonNumber')),
|
||||||
|
'episode_number': int_or_none(info.get('episodeNumber')),
|
||||||
|
'age_limit': int_or_none(info.get('minimum_age')),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
display_id = mobj.group('id')
|
display_id = mobj.group('id')
|
||||||
@ -113,72 +182,8 @@ class DPlayIE(InfoExtractor):
|
|||||||
|
|
||||||
if not video_id:
|
if not video_id:
|
||||||
host = mobj.group('host')
|
host = mobj.group('host')
|
||||||
disco_base = 'https://disco-api.%s' % host
|
return self._get_disco_api_info(
|
||||||
self._download_json(
|
url, display_id, 'disco-api.' + host, host.replace('.', ''))
|
||||||
'%s/token' % disco_base, display_id, 'Downloading token',
|
|
||||||
query={
|
|
||||||
'realm': host.replace('.', ''),
|
|
||||||
})
|
|
||||||
video = self._download_json(
|
|
||||||
'%s/content/videos/%s' % (disco_base, display_id), display_id,
|
|
||||||
headers={
|
|
||||||
'Referer': url,
|
|
||||||
'x-disco-client': 'WEB:UNKNOWN:dplay-client:0.0.1',
|
|
||||||
}, query={
|
|
||||||
'include': 'show'
|
|
||||||
})
|
|
||||||
video_id = video['data']['id']
|
|
||||||
info = video['data']['attributes']
|
|
||||||
title = info['name']
|
|
||||||
formats = []
|
|
||||||
for format_id, format_dict in self._download_json(
|
|
||||||
'%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id),
|
|
||||||
display_id)['data']['attributes']['streaming'].items():
|
|
||||||
if not isinstance(format_dict, dict):
|
|
||||||
continue
|
|
||||||
format_url = format_dict.get('url')
|
|
||||||
if not format_url:
|
|
||||||
continue
|
|
||||||
ext = determine_ext(format_url)
|
|
||||||
if format_id == 'dash' or ext == 'mpd':
|
|
||||||
formats.extend(self._extract_mpd_formats(
|
|
||||||
format_url, display_id, mpd_id='dash', fatal=False))
|
|
||||||
elif format_id == 'hls' or ext == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
format_url, display_id, 'mp4',
|
|
||||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
|
||||||
fatal=False))
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'url': format_url,
|
|
||||||
'format_id': format_id,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
series = None
|
|
||||||
try:
|
|
||||||
included = video.get('included')
|
|
||||||
if isinstance(included, list):
|
|
||||||
show = next(e for e in included if e.get('type') == 'show')
|
|
||||||
series = try_get(
|
|
||||||
show, lambda x: x['attributes']['name'], compat_str)
|
|
||||||
except StopIteration:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': title,
|
|
||||||
'description': info.get('description'),
|
|
||||||
'duration': float_or_none(
|
|
||||||
info.get('videoDuration'), scale=1000),
|
|
||||||
'timestamp': unified_timestamp(info.get('publishStart')),
|
|
||||||
'series': series,
|
|
||||||
'season_number': int_or_none(info.get('seasonNumber')),
|
|
||||||
'episode_number': int_or_none(info.get('episodeNumber')),
|
|
||||||
'age_limit': int_or_none(info.get('minimum_age')),
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
|
||||||
info = self._download_json(
|
info = self._download_json(
|
||||||
'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),
|
'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),
|
||||||
|
@ -147,6 +147,7 @@ from .camdemy import (
|
|||||||
CamdemyFolderIE
|
CamdemyFolderIE
|
||||||
)
|
)
|
||||||
from .cammodels import CamModelsIE
|
from .cammodels import CamModelsIE
|
||||||
|
from .camtube import CamTubeIE
|
||||||
from .camwithher import CamWithHerIE
|
from .camwithher import CamWithHerIE
|
||||||
from .canalplus import CanalplusIE
|
from .canalplus import CanalplusIE
|
||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
@ -381,6 +382,7 @@ from .francetv import (
|
|||||||
FranceTVSiteIE,
|
FranceTVSiteIE,
|
||||||
FranceTVEmbedIE,
|
FranceTVEmbedIE,
|
||||||
FranceTVInfoIE,
|
FranceTVInfoIE,
|
||||||
|
FranceTVInfoSportIE,
|
||||||
FranceTVJeunesseIE,
|
FranceTVJeunesseIE,
|
||||||
GenerationWhatIE,
|
GenerationWhatIE,
|
||||||
CultureboxIE,
|
CultureboxIE,
|
||||||
@ -705,12 +707,7 @@ from .nexx import (
|
|||||||
from .nfb import NFBIE
|
from .nfb import NFBIE
|
||||||
from .nfl import NFLIE
|
from .nfl import NFLIE
|
||||||
from .nhk import NhkVodIE
|
from .nhk import NhkVodIE
|
||||||
from .nhl import (
|
from .nhl import NHLIE
|
||||||
NHLVideocenterIE,
|
|
||||||
NHLNewsIE,
|
|
||||||
NHLVideocenterCategoryIE,
|
|
||||||
NHLIE,
|
|
||||||
)
|
|
||||||
from .nick import (
|
from .nick import (
|
||||||
NickIE,
|
NickIE,
|
||||||
NickBrIE,
|
NickBrIE,
|
||||||
@ -1142,6 +1139,7 @@ from .tvc import (
|
|||||||
from .tvigle import TvigleIE
|
from .tvigle import TvigleIE
|
||||||
from .tvland import TVLandIE
|
from .tvland import TVLandIE
|
||||||
from .tvn24 import TVN24IE
|
from .tvn24 import TVN24IE
|
||||||
|
from .tvnet import TVNetIE
|
||||||
from .tvnoe import TVNoeIE
|
from .tvnoe import TVNoeIE
|
||||||
from .tvnow import (
|
from .tvnow import (
|
||||||
TVNowIE,
|
TVNowIE,
|
||||||
|
@ -56,6 +56,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
|
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
|
||||||
|
|
||||||
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
|
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
|
||||||
|
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
||||||
@ -208,6 +209,17 @@ class FacebookIE(InfoExtractor):
|
|||||||
# no title
|
# no title
|
||||||
'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
|
'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '359649331226507',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '#ESLOne VoD - Birmingham Finals Day#1 Fnatic vs. @Evil Geniuses',
|
||||||
|
'uploader': 'ESL One Dota 2',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -312,16 +324,18 @@ class FacebookIE(InfoExtractor):
|
|||||||
if server_js_data:
|
if server_js_data:
|
||||||
video_data = extract_video_data(server_js_data.get('instances', []))
|
video_data = extract_video_data(server_js_data.get('instances', []))
|
||||||
|
|
||||||
|
def extract_from_jsmods_instances(js_data):
|
||||||
|
if js_data:
|
||||||
|
return extract_video_data(try_get(
|
||||||
|
js_data, lambda x: x['jsmods']['instances'], list) or [])
|
||||||
|
|
||||||
if not video_data:
|
if not video_data:
|
||||||
server_js_data = self._parse_json(
|
server_js_data = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)',
|
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)',
|
||||||
webpage, 'js data', default='{}'),
|
webpage, 'js data', default='{}'),
|
||||||
video_id, transform_source=js_to_json, fatal=False)
|
video_id, transform_source=js_to_json, fatal=False)
|
||||||
if server_js_data:
|
video_data = extract_from_jsmods_instances(server_js_data)
|
||||||
video_data = extract_video_data(try_get(
|
|
||||||
server_js_data, lambda x: x['jsmods']['instances'],
|
|
||||||
list) or [])
|
|
||||||
|
|
||||||
if not video_data:
|
if not video_data:
|
||||||
if not fatal_if_no_video:
|
if not fatal_if_no_video:
|
||||||
@ -333,8 +347,33 @@ class FacebookIE(InfoExtractor):
|
|||||||
expected=True)
|
expected=True)
|
||||||
elif '>You must log in to continue' in webpage:
|
elif '>You must log in to continue' in webpage:
|
||||||
self.raise_login_required()
|
self.raise_login_required()
|
||||||
else:
|
|
||||||
raise ExtractorError('Cannot parse data')
|
# Video info not in first request, do a secondary request using
|
||||||
|
# tahoe player specific URL
|
||||||
|
tahoe_data = self._download_webpage(
|
||||||
|
self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
|
||||||
|
data=urlencode_postdata({
|
||||||
|
'__user': 0,
|
||||||
|
'__a': 1,
|
||||||
|
'__pc': self._search_regex(
|
||||||
|
r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
|
||||||
|
'pkg cohort', default='PHASED:DEFAULT'),
|
||||||
|
'__rev': self._search_regex(
|
||||||
|
r'client_revision["\']\s*:\s*(\d+),', webpage,
|
||||||
|
'client revision', default='3944515'),
|
||||||
|
}),
|
||||||
|
headers={
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
})
|
||||||
|
tahoe_js_data = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_data,
|
||||||
|
'tahoe js data', default='{}'),
|
||||||
|
video_id, fatal=False)
|
||||||
|
video_data = extract_from_jsmods_instances(tahoe_js_data)
|
||||||
|
|
||||||
|
if not video_data:
|
||||||
|
raise ExtractorError('Cannot parse data')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for f in video_data:
|
for f in video_data:
|
||||||
@ -380,7 +419,8 @@ class FacebookIE(InfoExtractor):
|
|||||||
video_title = 'Facebook video #%s' % video_id
|
video_title = 'Facebook video #%s' % video_id
|
||||||
uploader = clean_html(get_element_by_id(
|
uploader = clean_html(get_element_by_id(
|
||||||
'fbPhotoPageAuthorName', webpage)) or self._search_regex(
|
'fbPhotoPageAuthorName', webpage)) or self._search_regex(
|
||||||
r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader', fatal=False)
|
r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',
|
||||||
|
fatal=False) or self._og_search_title(webpage, fatal=False)
|
||||||
timestamp = int_or_none(self._search_regex(
|
timestamp = int_or_none(self._search_regex(
|
||||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||||
'timestamp', default=None))
|
'timestamp', default=None))
|
||||||
|
@ -379,6 +379,31 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
|||||||
return self._make_url_result(video_id, catalogue)
|
return self._make_url_result(video_id, catalogue)
|
||||||
|
|
||||||
|
|
||||||
|
class FranceTVInfoSportIE(FranceTVBaseInfoExtractor):
|
||||||
|
IE_NAME = 'sport.francetvinfo.fr'
|
||||||
|
_VALID_URL = r'https?://sport\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://sport.francetvinfo.fr/les-jeux-olympiques/retour-sur-les-meilleurs-moments-de-pyeongchang-2018',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6e49080e-3f45-11e8-b459-000d3a2439ea',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Retour sur les meilleurs moments de Pyeongchang 2018',
|
||||||
|
'timestamp': 1523639962,
|
||||||
|
'upload_date': '20180413',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'add_ie': [FranceTVIE.ie_key()],
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
video_id = self._search_regex(r'data-video="([^"]+)"', webpage, 'video_id')
|
||||||
|
return self._make_url_result(video_id, 'Sport-web')
|
||||||
|
|
||||||
|
|
||||||
class GenerationWhatIE(InfoExtractor):
|
class GenerationWhatIE(InfoExtractor):
|
||||||
IE_NAME = 'france2.fr:generation-what'
|
IE_NAME = 'france2.fr:generation-what'
|
||||||
_VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#&]+)'
|
||||||
|
@ -21,6 +21,21 @@ class IncIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# div with id=kaltura_player_1_kqs38cgm
|
||||||
|
'url': 'https://www.inc.com/oscar-raymundo/richard-branson-young-entrepeneurs.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1_kqs38cgm',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Branson: "In the end, you have to say, Screw it. Just do it."',
|
||||||
|
'description': 'md5:21b832d034f9af5191ca5959da5e9cb6',
|
||||||
|
'timestamp': 1364403232,
|
||||||
|
'upload_date': '20130327',
|
||||||
|
'uploader_id': 'incdigital@inc.com',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.inc.com/video/david-whitford/founders-forum-tripadvisor-steve-kaufer-most-enjoyable-moment-for-entrepreneur.html',
|
'url': 'http://www.inc.com/video/david-whitford/founders-forum-tripadvisor-steve-kaufer-most-enjoyable-moment-for-entrepreneur.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -31,10 +46,13 @@ class IncIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
partner_id = self._search_regex(
|
partner_id = self._search_regex(
|
||||||
r'var\s+_?bizo_data_partner_id\s*=\s*["\'](\d+)', webpage, 'partner id')
|
r'var\s+_?bizo_data_partner_id\s*=\s*["\'](\d+)', webpage,
|
||||||
|
'partner id', default='1034971')
|
||||||
|
|
||||||
kaltura_id = self._parse_json(self._search_regex(
|
kaltura_id = self._search_regex(
|
||||||
r'pageInfo\.videos\s*=\s*\[(.+)\];', webpage, 'kaltura id'),
|
r'id=(["\'])kaltura_player_(?P<id>.+?)\1', webpage, 'kaltura id',
|
||||||
|
default=None, group='id') or self._parse_json(self._search_regex(
|
||||||
|
r'pageInfo\.videos\s*=\s*\[(.+)\];', webpage, 'kaltura id'),
|
||||||
display_id)['vid_kaltura_id']
|
display_id)['vid_kaltura_id']
|
||||||
|
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
|
@ -1,96 +1,90 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
from .nhl import NHLBaseIE
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
parse_duration,
|
|
||||||
parse_iso8601,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class MLBIE(InfoExtractor):
|
class MLBIE(NHLBaseIE):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:[\da-z_-]+\.)*mlb\.com/
|
(?:[\da-z_-]+\.)*(?P<site>mlb)\.com/
|
||||||
(?:
|
(?:
|
||||||
(?:
|
(?:
|
||||||
(?:.*?/)?video/(?:topic/[\da-z_-]+/)?(?:v|.*?/c-)|
|
(?:[^/]+/)*c-|
|
||||||
(?:
|
(?:
|
||||||
shared/video/embed/(?:embed|m-internal-embed)\.html|
|
shared/video/embed/(?:embed|m-internal-embed)\.html|
|
||||||
(?:[^/]+/)+(?:play|index)\.jsp|
|
(?:[^/]+/)+(?:play|index)\.jsp|
|
||||||
)\?.*?\bcontent_id=
|
)\?.*?\bcontent_id=
|
||||||
)
|
)
|
||||||
(?P<id>n?\d+)|
|
(?P<id>\d+)
|
||||||
(?:[^/]+/)*(?P<path>[^/]+)
|
|
||||||
)
|
)
|
||||||
'''
|
'''
|
||||||
|
_CONTENT_DOMAIN = 'content.mlb.com'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
|
'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933',
|
||||||
'md5': 'ff56a598c2cf411a9a38a69709e97079',
|
'md5': '632358dacfceec06bad823b83d21df2d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '34698933',
|
'id': '34698933',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Ackley's spectacular catch",
|
'title': "Ackley's spectacular catch",
|
||||||
'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
|
'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
|
||||||
'duration': 66,
|
'duration': 66,
|
||||||
'timestamp': 1405980600,
|
'timestamp': 1405995000,
|
||||||
'upload_date': '20140721',
|
'upload_date': '20140722',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby',
|
'url': 'https://www.mlb.com/video/stanton-prepares-for-derby/c-34496663',
|
||||||
'md5': 'd9c022c10d21f849f49c05ae12a8a7e9',
|
'md5': 'bf2619bf9cacc0a564fc35e6aeb9219f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '34496663',
|
'id': '34496663',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Stanton prepares for Derby',
|
'title': 'Stanton prepares for Derby',
|
||||||
'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57',
|
'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57',
|
||||||
'duration': 46,
|
'duration': 46,
|
||||||
'timestamp': 1405105800,
|
'timestamp': 1405120200,
|
||||||
'upload_date': '20140711',
|
'upload_date': '20140711',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://m.mlb.com/video/topic/vtp_hrd_sponsor/v34578115/hrd-cespedes-wins-2014-gillette-home-run-derby',
|
'url': 'https://www.mlb.com/video/cespedes-repeats-as-derby-champ/c-34578115',
|
||||||
'md5': '0e6e73d509321e142409b695eadd541f',
|
'md5': '99bb9176531adc600b90880fb8be9328',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '34578115',
|
'id': '34578115',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Cespedes repeats as Derby champ',
|
'title': 'Cespedes repeats as Derby champ',
|
||||||
'description': 'md5:08df253ce265d4cf6fb09f581fafad07',
|
'description': 'md5:08df253ce265d4cf6fb09f581fafad07',
|
||||||
'duration': 488,
|
'duration': 488,
|
||||||
'timestamp': 1405399936,
|
'timestamp': 1405414336,
|
||||||
'upload_date': '20140715',
|
'upload_date': '20140715',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://m.mlb.com/video/v34577915/bautista-on-derby-captaining-duties-his-performance',
|
'url': 'https://www.mlb.com/video/bautista-on-home-run-derby/c-34577915',
|
||||||
'md5': 'b8fd237347b844365d74ea61d4245967',
|
'md5': 'da8b57a12b060e7663ee1eebd6f330ec',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '34577915',
|
'id': '34577915',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Bautista on Home Run Derby',
|
'title': 'Bautista on Home Run Derby',
|
||||||
'description': 'md5:b80b34031143d0986dddc64a8839f0fb',
|
'description': 'md5:b80b34031143d0986dddc64a8839f0fb',
|
||||||
'duration': 52,
|
'duration': 52,
|
||||||
'timestamp': 1405390722,
|
'timestamp': 1405405122,
|
||||||
'upload_date': '20140715',
|
'upload_date': '20140715',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer',
|
'url': 'https://www.mlb.com/news/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer/c-118550098',
|
||||||
'md5': 'aafaf5b0186fee8f32f20508092f8111',
|
'md5': 'e09e37b552351fddbf4d9e699c924d68',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '75609783',
|
'id': '75609783',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Must C: Pillar climbs for catch',
|
'title': 'Must C: Pillar climbs for catch',
|
||||||
'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run',
|
'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run',
|
||||||
'timestamp': 1429124820,
|
'timestamp': 1429139220,
|
||||||
'upload_date': '20150415',
|
'upload_date': '20150415',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -111,7 +105,7 @@ class MLBIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://m.cardinals.mlb.com/stl/video/v51175783/atlstl-piscotty-makes-great-sliding-catch-on-line/?partnerId=as_mlb_20150321_42500876&adbid=579409712979910656&adbpl=tw&adbpr=52847728',
|
'url': 'https://www.mlb.com/cardinals/video/piscottys-great-sliding-catch/c-51175783',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -120,58 +114,7 @@ class MLBIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://washington.nationals.mlb.com/mlb/gameday/index.jsp?c_id=was&gid=2015_05_09_atlmlb_wasmlb_1&lang=en&content_id=108309983&mode=video#',
|
'url': 'https://www.mlb.com/cut4/carlos-gomez-borrowed-sunglasses-from-an-as-fan/c-278912842',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
if not video_id:
|
|
||||||
video_path = mobj.group('path')
|
|
||||||
webpage = self._download_webpage(url, video_path)
|
|
||||||
video_id = self._search_regex(
|
|
||||||
[r'data-video-?id="(\d+)"', r'content_id=(\d+)'], webpage, 'video id')
|
|
||||||
|
|
||||||
detail = self._download_xml(
|
|
||||||
'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml'
|
|
||||||
% (video_id[-3], video_id[-2], video_id[-1], video_id), video_id)
|
|
||||||
|
|
||||||
title = detail.find('./headline').text
|
|
||||||
description = detail.find('./big-blurb').text
|
|
||||||
duration = parse_duration(detail.find('./duration').text)
|
|
||||||
timestamp = parse_iso8601(detail.attrib['date'][:-5])
|
|
||||||
|
|
||||||
thumbnails = [{
|
|
||||||
'url': thumbnail.text,
|
|
||||||
} for thumbnail in detail.findall('./thumbnailScenarios/thumbnailScenario')]
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for media_url in detail.findall('./url'):
|
|
||||||
playback_scenario = media_url.attrib['playback_scenario']
|
|
||||||
fmt = {
|
|
||||||
'url': media_url.text,
|
|
||||||
'format_id': playback_scenario,
|
|
||||||
}
|
|
||||||
m = re.search(r'(?P<vbr>\d+)K_(?P<width>\d+)X(?P<height>\d+)', playback_scenario)
|
|
||||||
if m:
|
|
||||||
fmt.update({
|
|
||||||
'vbr': int(m.group('vbr')) * 1000,
|
|
||||||
'width': int(m.group('width')),
|
|
||||||
'height': int(m.group('height')),
|
|
||||||
})
|
|
||||||
formats.append(fmt)
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'duration': duration,
|
|
||||||
'timestamp': timestamp,
|
|
||||||
'formats': formats,
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
}
|
|
||||||
|
@ -29,14 +29,13 @@ class NexxIE(InfoExtractor):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# movie
|
# movie
|
||||||
'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
|
'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
|
||||||
'md5': '828cea195be04e66057b846288295ba1',
|
'md5': '31899fd683de49ad46f4ee67e53e83fe',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '128907',
|
'id': '128907',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Stiftung Warentest',
|
'title': 'Stiftung Warentest',
|
||||||
'alt_title': 'Wie ein Test abläuft',
|
'alt_title': 'Wie ein Test abläuft',
|
||||||
'description': 'md5:d1ddb1ef63de721132abd38639cc2fd2',
|
'description': 'md5:d1ddb1ef63de721132abd38639cc2fd2',
|
||||||
'release_year': 2013,
|
|
||||||
'creator': 'SPIEGEL TV',
|
'creator': 'SPIEGEL TV',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 2509,
|
'duration': 2509,
|
||||||
@ -62,6 +61,7 @@ class NexxIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'HTTP Error 404: Not Found',
|
||||||
}, {
|
}, {
|
||||||
# does not work via arc
|
# does not work via arc
|
||||||
'url': 'nexx:741:1269984',
|
'url': 'nexx:741:1269984',
|
||||||
@ -71,12 +71,26 @@ class NexxIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '1 TAG ohne KLO... wortwörtlich! 😑',
|
'title': '1 TAG ohne KLO... wortwörtlich! 😑',
|
||||||
'alt_title': '1 TAG ohne KLO... wortwörtlich! 😑',
|
'alt_title': '1 TAG ohne KLO... wortwörtlich! 😑',
|
||||||
'description': 'md5:4604539793c49eda9443ab5c5b1d612f',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 607,
|
'duration': 607,
|
||||||
'timestamp': 1518614955,
|
'timestamp': 1518614955,
|
||||||
'upload_date': '20180214',
|
'upload_date': '20180214',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# free cdn from http://www.spiegel.de/video/eifel-zoo-aufregung-um-ausgebrochene-raubtiere-video-99018031.html
|
||||||
|
'url': 'nexx:747:1533779',
|
||||||
|
'md5': '6bf6883912b82b7069fb86c2297e9893',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1533779',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Aufregung um ausgebrochene Raubtiere',
|
||||||
|
'alt_title': 'Eifel-Zoo',
|
||||||
|
'description': 'md5:f21375c91c74ad741dcb164c427999d2',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 111,
|
||||||
|
'timestamp': 1527874460,
|
||||||
|
'upload_date': '20180601',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
|
'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -141,6 +155,139 @@ class NexxIE(InfoExtractor):
|
|||||||
self._handle_error(result)
|
self._handle_error(result)
|
||||||
return result['result']
|
return result['result']
|
||||||
|
|
||||||
|
def _extract_free_formats(self, video, video_id):
|
||||||
|
stream_data = video['streamdata']
|
||||||
|
cdn = stream_data['cdnType']
|
||||||
|
assert cdn == 'free'
|
||||||
|
|
||||||
|
hash = video['general']['hash']
|
||||||
|
|
||||||
|
ps = compat_str(stream_data['originalDomain'])
|
||||||
|
if stream_data['applyFolderHierarchy'] == 1:
|
||||||
|
s = ('%04d' % int(video_id))[::-1]
|
||||||
|
ps += '/%s/%s' % (s[0:2], s[2:4])
|
||||||
|
ps += '/%s/%s_' % (video_id, hash)
|
||||||
|
|
||||||
|
t = 'http://%s' + ps
|
||||||
|
fd = stream_data['azureFileDistribution'].split(',')
|
||||||
|
cdn_provider = stream_data['cdnProvider']
|
||||||
|
|
||||||
|
def p0(p):
|
||||||
|
return '_%s' % p if stream_data['applyAzureStructure'] == 1 else ''
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
if cdn_provider == 'ak':
|
||||||
|
t += ','
|
||||||
|
for i in fd:
|
||||||
|
p = i.split(':')
|
||||||
|
t += p[1] + p0(int(p[0])) + ','
|
||||||
|
t += '.mp4.csmil/master.%s'
|
||||||
|
elif cdn_provider == 'ce':
|
||||||
|
k = t.split('/')
|
||||||
|
h = k.pop()
|
||||||
|
http_base = t = '/'.join(k)
|
||||||
|
http_base = http_base % stream_data['cdnPathHTTP']
|
||||||
|
t += '/asset.ism/manifest.%s?dcp_ver=aos4&videostream='
|
||||||
|
for i in fd:
|
||||||
|
p = i.split(':')
|
||||||
|
tbr = int(p[0])
|
||||||
|
filename = '%s%s%s.mp4' % (h, p[1], p0(tbr))
|
||||||
|
f = {
|
||||||
|
'url': http_base + '/' + filename,
|
||||||
|
'format_id': '%s-http-%d' % (cdn, tbr),
|
||||||
|
'tbr': tbr,
|
||||||
|
}
|
||||||
|
width_height = p[1].split('x')
|
||||||
|
if len(width_height) == 2:
|
||||||
|
f.update({
|
||||||
|
'width': int_or_none(width_height[0]),
|
||||||
|
'height': int_or_none(width_height[1]),
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
|
a = filename + ':%s' % (tbr * 1000)
|
||||||
|
t += a + ','
|
||||||
|
t = t[:-1] + '&audiostream=' + a.split(':')[0]
|
||||||
|
else:
|
||||||
|
assert False
|
||||||
|
|
||||||
|
if cdn_provider == 'ce':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
t % (stream_data['cdnPathDASH'], 'mpd'), video_id,
|
||||||
|
mpd_id='%s-dash' % cdn, fatal=False))
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
t % (stream_data['cdnPathHLS'], 'm3u8'), video_id, 'mp4',
|
||||||
|
entry_protocol='m3u8_native', m3u8_id='%s-hls' % cdn, fatal=False))
|
||||||
|
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _extract_azure_formats(self, video, video_id):
|
||||||
|
stream_data = video['streamdata']
|
||||||
|
cdn = stream_data['cdnType']
|
||||||
|
assert cdn == 'azure'
|
||||||
|
|
||||||
|
azure_locator = stream_data['azureLocator']
|
||||||
|
|
||||||
|
def get_cdn_shield_base(shield_type='', static=False):
|
||||||
|
for secure in ('', 's'):
|
||||||
|
cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper()))
|
||||||
|
if cdn_shield:
|
||||||
|
return 'http%s://%s' % (secure, cdn_shield)
|
||||||
|
else:
|
||||||
|
if 'fb' in stream_data['azureAccount']:
|
||||||
|
prefix = 'df' if static else 'f'
|
||||||
|
else:
|
||||||
|
prefix = 'd' if static else 'p'
|
||||||
|
account = int(stream_data['azureAccount'].replace('nexxplayplus', '').replace('nexxplayfb', ''))
|
||||||
|
return 'http://nx-%s%02d.akamaized.net/' % (prefix, account)
|
||||||
|
|
||||||
|
language = video['general'].get('language_raw') or ''
|
||||||
|
|
||||||
|
azure_stream_base = get_cdn_shield_base()
|
||||||
|
is_ml = ',' in language
|
||||||
|
azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % (
|
||||||
|
azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s'
|
||||||
|
|
||||||
|
protection_token = try_get(
|
||||||
|
video, lambda x: x['protectiondata']['token'], compat_str)
|
||||||
|
if protection_token:
|
||||||
|
azure_manifest_url += '?hdnts=%s' % protection_token
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
azure_manifest_url % '(format=m3u8-aapl)',
|
||||||
|
video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='%s-hls' % cdn, fatal=False)
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
azure_manifest_url % '(format=mpd-time-csf)',
|
||||||
|
video_id, mpd_id='%s-dash' % cdn, fatal=False))
|
||||||
|
formats.extend(self._extract_ism_formats(
|
||||||
|
azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
|
||||||
|
|
||||||
|
azure_progressive_base = get_cdn_shield_base('Prog', True)
|
||||||
|
azure_file_distribution = stream_data.get('azureFileDistribution')
|
||||||
|
if azure_file_distribution:
|
||||||
|
fds = azure_file_distribution.split(',')
|
||||||
|
if fds:
|
||||||
|
for fd in fds:
|
||||||
|
ss = fd.split(':')
|
||||||
|
if len(ss) == 2:
|
||||||
|
tbr = int_or_none(ss[0])
|
||||||
|
if tbr:
|
||||||
|
f = {
|
||||||
|
'url': '%s%s/%s_src_%s_%d.mp4' % (
|
||||||
|
azure_progressive_base, azure_locator, video_id, ss[1], tbr),
|
||||||
|
'format_id': '%s-http-%d' % (cdn, tbr),
|
||||||
|
'tbr': tbr,
|
||||||
|
}
|
||||||
|
width_height = ss[1].split('x')
|
||||||
|
if len(width_height) == 2:
|
||||||
|
f.update({
|
||||||
|
'width': int_or_none(width_height[0]),
|
||||||
|
'height': int_or_none(width_height[1]),
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
|
|
||||||
|
return formats
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
|
domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
|
||||||
@ -220,72 +367,15 @@ class NexxIE(InfoExtractor):
|
|||||||
general = video['general']
|
general = video['general']
|
||||||
title = general['title']
|
title = general['title']
|
||||||
|
|
||||||
stream_data = video['streamdata']
|
cdn = video['streamdata']['cdnType']
|
||||||
language = general.get('language_raw') or ''
|
|
||||||
|
|
||||||
# TODO: reverse more cdns
|
if cdn == 'azure':
|
||||||
|
formats = self._extract_azure_formats(video, video_id)
|
||||||
cdn = stream_data['cdnType']
|
elif cdn == 'free':
|
||||||
assert cdn == 'azure'
|
formats = self._extract_free_formats(video, video_id)
|
||||||
|
else:
|
||||||
azure_locator = stream_data['azureLocator']
|
# TODO: reverse more cdns
|
||||||
|
assert False
|
||||||
def get_cdn_shield_base(shield_type='', static=False):
|
|
||||||
for secure in ('', 's'):
|
|
||||||
cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper()))
|
|
||||||
if cdn_shield:
|
|
||||||
return 'http%s://%s' % (secure, cdn_shield)
|
|
||||||
else:
|
|
||||||
if 'fb' in stream_data['azureAccount']:
|
|
||||||
prefix = 'df' if static else 'f'
|
|
||||||
else:
|
|
||||||
prefix = 'd' if static else 'p'
|
|
||||||
account = int(stream_data['azureAccount'].replace('nexxplayplus', '').replace('nexxplayfb', ''))
|
|
||||||
return 'http://nx-%s%02d.akamaized.net/' % (prefix, account)
|
|
||||||
|
|
||||||
azure_stream_base = get_cdn_shield_base()
|
|
||||||
is_ml = ',' in language
|
|
||||||
azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % (
|
|
||||||
azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s'
|
|
||||||
|
|
||||||
protection_token = try_get(
|
|
||||||
video, lambda x: x['protectiondata']['token'], compat_str)
|
|
||||||
if protection_token:
|
|
||||||
azure_manifest_url += '?hdnts=%s' % protection_token
|
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
azure_manifest_url % '(format=m3u8-aapl)',
|
|
||||||
video_id, 'mp4', 'm3u8_native',
|
|
||||||
m3u8_id='%s-hls' % cdn, fatal=False)
|
|
||||||
formats.extend(self._extract_mpd_formats(
|
|
||||||
azure_manifest_url % '(format=mpd-time-csf)',
|
|
||||||
video_id, mpd_id='%s-dash' % cdn, fatal=False))
|
|
||||||
formats.extend(self._extract_ism_formats(
|
|
||||||
azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
|
|
||||||
|
|
||||||
azure_progressive_base = get_cdn_shield_base('Prog', True)
|
|
||||||
azure_file_distribution = stream_data.get('azureFileDistribution')
|
|
||||||
if azure_file_distribution:
|
|
||||||
fds = azure_file_distribution.split(',')
|
|
||||||
if fds:
|
|
||||||
for fd in fds:
|
|
||||||
ss = fd.split(':')
|
|
||||||
if len(ss) == 2:
|
|
||||||
tbr = int_or_none(ss[0])
|
|
||||||
if tbr:
|
|
||||||
f = {
|
|
||||||
'url': '%s%s/%s_src_%s_%d.mp4' % (
|
|
||||||
azure_progressive_base, azure_locator, video_id, ss[1], tbr),
|
|
||||||
'format_id': '%s-http-%d' % (cdn, tbr),
|
|
||||||
'tbr': tbr,
|
|
||||||
}
|
|
||||||
width_height = ss[1].split('x')
|
|
||||||
if len(width_height) == 2:
|
|
||||||
f.update({
|
|
||||||
'width': int_or_none(width_height[0]),
|
|
||||||
'height': int_or_none(width_height[1]),
|
|
||||||
})
|
|
||||||
formats.append(f)
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
@ -1,18 +1,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
import os
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_str
|
||||||
compat_urlparse,
|
|
||||||
compat_urllib_parse_urlencode,
|
|
||||||
compat_urllib_parse_urlparse,
|
|
||||||
compat_str,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unified_strdate,
|
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
@ -20,236 +12,77 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class NHLBaseInfoExtractor(InfoExtractor):
|
class NHLBaseIE(InfoExtractor):
|
||||||
@staticmethod
|
def _real_extract(self, url):
|
||||||
def _fix_json(json_string):
|
site, tmp_id = re.match(self._VALID_URL, url).groups()
|
||||||
return json_string.replace('\\\'', '\'')
|
video_data = self._download_json(
|
||||||
|
'https://%s/%s/%sid/v1/%s/details/web-v1.json'
|
||||||
|
% (self._CONTENT_DOMAIN, site[:3], 'item/' if site == 'mlb' else '', tmp_id), tmp_id)
|
||||||
|
if video_data.get('type') != 'video':
|
||||||
|
video_data = video_data['media']
|
||||||
|
video = video_data.get('video')
|
||||||
|
if video:
|
||||||
|
video_data = video
|
||||||
|
else:
|
||||||
|
videos = video_data.get('videos')
|
||||||
|
if videos:
|
||||||
|
video_data = videos[0]
|
||||||
|
|
||||||
def _real_extract_video(self, video_id):
|
video_id = compat_str(video_data['id'])
|
||||||
vid_parts = video_id.split(',')
|
title = video_data['title']
|
||||||
if len(vid_parts) == 3:
|
|
||||||
video_id = '%s0%s%s-X-h' % (vid_parts[0][:4], vid_parts[1], vid_parts[2].rjust(4, '0'))
|
|
||||||
json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
|
|
||||||
data = self._download_json(
|
|
||||||
json_url, video_id, transform_source=self._fix_json)
|
|
||||||
return self._extract_video(data[0])
|
|
||||||
|
|
||||||
def _extract_video(self, info):
|
formats = []
|
||||||
video_id = info['id']
|
for playback in video_data.get('playbacks', []):
|
||||||
self.report_extraction(video_id)
|
playback_url = playback.get('url')
|
||||||
|
if not playback_url:
|
||||||
|
continue
|
||||||
|
ext = determine_ext(playback_url)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
|
playback_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id=playback.get('name', 'hls'), fatal=False)
|
||||||
|
self._check_formats(m3u8_formats, video_id)
|
||||||
|
formats.extend(m3u8_formats)
|
||||||
|
else:
|
||||||
|
height = int_or_none(playback.get('height'))
|
||||||
|
formats.append({
|
||||||
|
'format_id': playback.get('name', 'http' + ('-%dp' % height if height else '')),
|
||||||
|
'url': playback_url,
|
||||||
|
'width': int_or_none(playback.get('width')),
|
||||||
|
'height': height,
|
||||||
|
'tbr': int_or_none(self._search_regex(r'_(\d+)[kK]', playback_url, 'bitrate', default=None)),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
initial_video_url = info['publishPoint']
|
thumbnails = []
|
||||||
if info['formats'] == '1':
|
cuts = video_data.get('image', {}).get('cuts') or []
|
||||||
parsed_url = compat_urllib_parse_urlparse(initial_video_url)
|
if isinstance(cuts, dict):
|
||||||
filename, ext = os.path.splitext(parsed_url.path)
|
cuts = cuts.values()
|
||||||
path = '%s_sd%s' % (filename, ext)
|
for thumbnail_data in cuts:
|
||||||
data = compat_urllib_parse_urlencode({
|
thumbnail_url = thumbnail_data.get('src')
|
||||||
'type': 'fvod',
|
if not thumbnail_url:
|
||||||
'path': compat_urlparse.urlunparse(parsed_url[:2] + (path,) + parsed_url[3:])
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'url': thumbnail_url,
|
||||||
|
'width': int_or_none(thumbnail_data.get('width')),
|
||||||
|
'height': int_or_none(thumbnail_data.get('height')),
|
||||||
})
|
})
|
||||||
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
|
|
||||||
path_doc = self._download_xml(
|
|
||||||
path_url, video_id, 'Downloading final video url')
|
|
||||||
video_url = path_doc.find('path').text
|
|
||||||
else:
|
|
||||||
video_url = initial_video_url
|
|
||||||
|
|
||||||
join = compat_urlparse.urljoin
|
|
||||||
ret = {
|
|
||||||
'id': video_id,
|
|
||||||
'title': info['name'],
|
|
||||||
'url': video_url,
|
|
||||||
'description': info['description'],
|
|
||||||
'duration': int(info['duration']),
|
|
||||||
'thumbnail': join(join(video_url, '/u/'), info['bigImage']),
|
|
||||||
'upload_date': unified_strdate(info['releaseDate'].split('.')[0]),
|
|
||||||
}
|
|
||||||
if video_url.startswith('rtmp:'):
|
|
||||||
mobj = re.match(r'(?P<tc_url>rtmp://[^/]+/(?P<app>[a-z0-9/]+))/(?P<play_path>mp4:.*)', video_url)
|
|
||||||
ret.update({
|
|
||||||
'tc_url': mobj.group('tc_url'),
|
|
||||||
'play_path': mobj.group('play_path'),
|
|
||||||
'app': mobj.group('app'),
|
|
||||||
'no_resume': True,
|
|
||||||
})
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
class NHLVideocenterIE(NHLBaseInfoExtractor):
|
|
||||||
IE_NAME = 'nhl.com:videocenter'
|
|
||||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console|embed)?(?:\?(?:.*?[?&])?)(?:id|hlg|playlist)=(?P<id>[-0-9a-zA-Z,]+)'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
|
|
||||||
'md5': 'db704a4ea09e8d3988c85e36cc892d09',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '453614',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Quick clip: Weise 4-3 goal vs Flames',
|
|
||||||
'description': 'Dale Weise scores his first of the season to put the Canucks up 4-3.',
|
|
||||||
'duration': 18,
|
|
||||||
'upload_date': '20131006',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://video.nhl.com/videocenter/console?id=2014020024-628-h',
|
|
||||||
'md5': 'd22e82bc592f52d37d24b03531ee9696',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2014020024-628-h',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Alex Galchenyuk Goal on Ray Emery (14:40/3rd)',
|
|
||||||
'description': 'Home broadcast - Montreal Canadiens at Philadelphia Flyers - October 11, 2014',
|
|
||||||
'duration': 0,
|
|
||||||
'upload_date': '20141011',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://video.mapleleafs.nhl.com/videocenter/console?id=58665&catid=802',
|
|
||||||
'md5': 'c78fc64ea01777e426cfc202b746c825',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '58665',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Classic Game In Six - April 22, 1979',
|
|
||||||
'description': 'It was the last playoff game for the Leafs in the decade, and the last time the Leafs and Habs played in the playoffs. Great game, not a great ending.',
|
|
||||||
'duration': 400,
|
|
||||||
'upload_date': '20100129'
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://video.nhl.com/videocenter/?id=736722',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://video.nhl.com/videocenter/console?hlg=20142015,2,299&lang=en',
|
|
||||||
'md5': '076fcb88c255154aacbf0a7accc3f340',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2014020299-X-h',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Penguins at Islanders / Game Highlights',
|
|
||||||
'description': 'Home broadcast - Pittsburgh Penguins at New York Islanders - November 22, 2014',
|
|
||||||
'duration': 268,
|
|
||||||
'upload_date': '20141122',
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://video.oilers.nhl.com/videocenter/console?id=691469&catid=4',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '691469',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'RAW | Craig MacTavish Full Press Conference',
|
|
||||||
'description': 'Oilers GM Craig MacTavish addresses the media at Rexall Place on Friday.',
|
|
||||||
'upload_date': '20141205',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True, # Requires rtmpdump
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://video.nhl.com/videocenter/embed?playlist=836127',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
return self._real_extract_video(video_id)
|
|
||||||
|
|
||||||
|
|
||||||
class NHLNewsIE(NHLBaseInfoExtractor):
|
|
||||||
IE_NAME = 'nhl.com:news'
|
|
||||||
IE_DESC = 'NHL news'
|
|
||||||
_VALID_URL = r'https?://(?:.+?\.)?nhl\.com/(?:ice|club)/news\.html?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.nhl.com/ice/news.htm?id=750727',
|
|
||||||
'md5': '4b3d1262e177687a3009937bd9ec0be8',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '736722',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Cal Clutterbuck has been fined $2,000',
|
|
||||||
'description': 'md5:45fe547d30edab88b23e0dd0ab1ed9e6',
|
|
||||||
'duration': 37,
|
|
||||||
'upload_date': '20150128',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
# iframe embed
|
|
||||||
'url': 'http://sabres.nhl.com/club/news.htm?id=780189',
|
|
||||||
'md5': '9f663d1c006c90ac9fb82777d4294e12',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '836127',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Morning Skate: OTT vs. BUF (9/23/15)',
|
|
||||||
'description': "Brian Duff chats with Tyler Ennis prior to Buffalo's first preseason home game.",
|
|
||||||
'duration': 93,
|
|
||||||
'upload_date': '20150923',
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
news_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, news_id)
|
|
||||||
video_id = self._search_regex(
|
|
||||||
[r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'",
|
|
||||||
r'<iframe[^>]+src=["\']https?://video.*?\.nhl\.com/videocenter/embed\?.*\bplaylist=(\d+)'],
|
|
||||||
webpage, 'video id')
|
|
||||||
return self._real_extract_video(video_id)
|
|
||||||
|
|
||||||
|
|
||||||
class NHLVideocenterCategoryIE(NHLBaseInfoExtractor):
|
|
||||||
IE_NAME = 'nhl.com:videocenter:category'
|
|
||||||
IE_DESC = 'NHL videocenter category'
|
|
||||||
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?[^(id=)]*catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '999',
|
|
||||||
'title': 'Highlights',
|
|
||||||
},
|
|
||||||
'playlist_count': 12,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
team = mobj.group('team')
|
|
||||||
webpage = self._download_webpage(url, team)
|
|
||||||
cat_id = self._search_regex(
|
|
||||||
[r'var defaultCatId = "(.+?)";',
|
|
||||||
r'{statusIndex:0,index:0,.*?id:(.*?),'],
|
|
||||||
webpage, 'category id')
|
|
||||||
playlist_title = self._html_search_regex(
|
|
||||||
r'tab0"[^>]*?>(.*?)</td>',
|
|
||||||
webpage, 'playlist title', flags=re.DOTALL).lower().capitalize()
|
|
||||||
|
|
||||||
data = compat_urllib_parse_urlencode({
|
|
||||||
'cid': cat_id,
|
|
||||||
# This is the default value
|
|
||||||
'count': 12,
|
|
||||||
'ptrs': 3,
|
|
||||||
'format': 'json',
|
|
||||||
})
|
|
||||||
path = '/videocenter/servlets/browse?' + data
|
|
||||||
request_url = compat_urlparse.urljoin(url, path)
|
|
||||||
response = self._download_webpage(request_url, playlist_title)
|
|
||||||
response = self._fix_json(response)
|
|
||||||
if not response.strip():
|
|
||||||
self._downloader.report_warning('Got an empty response, trying '
|
|
||||||
'adding the "newvideos" parameter')
|
|
||||||
response = self._download_webpage(request_url + '&newvideos=true',
|
|
||||||
playlist_title)
|
|
||||||
response = self._fix_json(response)
|
|
||||||
videos = json.loads(response)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'id': video_id,
|
||||||
'title': playlist_title,
|
'title': title,
|
||||||
'id': cat_id,
|
'description': video_data.get('description'),
|
||||||
'entries': [self._extract_video(v) for v in videos],
|
'timestamp': parse_iso8601(video_data.get('date')),
|
||||||
|
'duration': parse_duration(video_data.get('duration')),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class NHLIE(InfoExtractor):
|
class NHLIE(NHLBaseIE):
|
||||||
IE_NAME = 'nhl.com'
|
IE_NAME = 'nhl.com'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>nhl|wch2016)\.com/(?:[^/]+/)*c-(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<site>nhl|wch2016)\.com/(?:[^/]+/)*c-(?P<id>\d+)'
|
||||||
_SITES_MAP = {
|
_CONTENT_DOMAIN = 'nhl.bamcontent.com'
|
||||||
'nhl': 'nhl',
|
|
||||||
'wch2016': 'wch',
|
|
||||||
}
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# type=video
|
# type=video
|
||||||
'url': 'https://www.nhl.com/video/anisimov-cleans-up-mess/t-277752844/c-43663503',
|
'url': 'https://www.nhl.com/video/anisimov-cleans-up-mess/t-277752844/c-43663503',
|
||||||
@ -293,59 +126,3 @@ class NHLIE(InfoExtractor):
|
|||||||
'url': 'https://www.wch2016.com/news/3-stars-team-europe-vs-team-canada/c-282195068',
|
'url': 'https://www.wch2016.com/news/3-stars-team-europe-vs-team-canada/c-282195068',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
tmp_id, site = mobj.group('id'), mobj.group('site')
|
|
||||||
video_data = self._download_json(
|
|
||||||
'https://nhl.bamcontent.com/%s/id/v1/%s/details/web-v1.json'
|
|
||||||
% (self._SITES_MAP[site], tmp_id), tmp_id)
|
|
||||||
if video_data.get('type') == 'article':
|
|
||||||
video_data = video_data['media']
|
|
||||||
|
|
||||||
video_id = compat_str(video_data['id'])
|
|
||||||
title = video_data['title']
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for playback in video_data.get('playbacks', []):
|
|
||||||
playback_url = playback.get('url')
|
|
||||||
if not playback_url:
|
|
||||||
continue
|
|
||||||
ext = determine_ext(playback_url)
|
|
||||||
if ext == 'm3u8':
|
|
||||||
m3u8_formats = self._extract_m3u8_formats(
|
|
||||||
playback_url, video_id, 'mp4', 'm3u8_native',
|
|
||||||
m3u8_id=playback.get('name', 'hls'), fatal=False)
|
|
||||||
self._check_formats(m3u8_formats, video_id)
|
|
||||||
formats.extend(m3u8_formats)
|
|
||||||
else:
|
|
||||||
height = int_or_none(playback.get('height'))
|
|
||||||
formats.append({
|
|
||||||
'format_id': playback.get('name', 'http' + ('-%dp' % height if height else '')),
|
|
||||||
'url': playback_url,
|
|
||||||
'width': int_or_none(playback.get('width')),
|
|
||||||
'height': height,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats, ('preference', 'width', 'height', 'tbr', 'format_id'))
|
|
||||||
|
|
||||||
thumbnails = []
|
|
||||||
for thumbnail_id, thumbnail_data in video_data.get('image', {}).get('cuts', {}).items():
|
|
||||||
thumbnail_url = thumbnail_data.get('src')
|
|
||||||
if not thumbnail_url:
|
|
||||||
continue
|
|
||||||
thumbnails.append({
|
|
||||||
'id': thumbnail_id,
|
|
||||||
'url': thumbnail_url,
|
|
||||||
'width': int_or_none(thumbnail_data.get('width')),
|
|
||||||
'height': int_or_none(thumbnail_data.get('height')),
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': video_data.get('description'),
|
|
||||||
'timestamp': parse_iso8601(video_data.get('date')),
|
|
||||||
'duration': parse_duration(video_data.get('duration')),
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
@ -4,7 +4,6 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
@ -36,8 +36,8 @@ class NPOIE(NPOBaseIE):
|
|||||||
https?://
|
https?://
|
||||||
(?:www\.)?
|
(?:www\.)?
|
||||||
(?:
|
(?:
|
||||||
npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}|
|
npo\.nl/(?:[^/]+/)*|
|
||||||
ntr\.nl/(?:[^/]+/){2,}|
|
(?:ntr|npostart)\.nl/(?:[^/]+/){2,}|
|
||||||
omroepwnl\.nl/video/fragment/[^/]+__|
|
omroepwnl\.nl/video/fragment/[^/]+__|
|
||||||
(?:zapp|npo3)\.nl/(?:[^/]+/){2,}
|
(?:zapp|npo3)\.nl/(?:[^/]+/){2,}
|
||||||
)
|
)
|
||||||
@ -160,8 +160,20 @@ class NPOIE(NPOBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.zapp.nl/1803-skelterlab/instructie-video-s/740-instructievideo-s/POMS_AT_11736927',
|
'url': 'https://www.zapp.nl/1803-skelterlab/instructie-video-s/740-instructievideo-s/POMS_AT_11736927',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.npostart.nl/broodje-gezond-ei/28-05-2018/KN_1698996',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://npo.nl/KN_1698996',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return (False if any(ie.suitable(url)
|
||||||
|
for ie in (NPOLiveIE, NPORadioIE, NPORadioFragmentIE))
|
||||||
|
else super(NPOIE, cls).suitable(url))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
return self._get_info(video_id)
|
return self._get_info(video_id)
|
||||||
@ -389,7 +401,7 @@ class NPOLiveIE(NPOBaseIE):
|
|||||||
|
|
||||||
class NPORadioIE(InfoExtractor):
|
class NPORadioIE(InfoExtractor):
|
||||||
IE_NAME = 'npo.nl:radio'
|
IE_NAME = 'npo.nl:radio'
|
||||||
_VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)/?$'
|
_VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.npo.nl/radio/radio-1',
|
'url': 'http://www.npo.nl/radio/radio-1',
|
||||||
@ -404,6 +416,10 @@ class NPORadioIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if NPORadioFragmentIE.suitable(url) else super(NPORadioIE, cls).suitable(url)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _html_get_attribute_regex(attribute):
|
def _html_get_attribute_regex(attribute):
|
||||||
return r'{0}\s*=\s*\'([^\']+)\''.format(attribute)
|
return r'{0}\s*=\s*\'([^\']+)\''.format(attribute)
|
||||||
|
@ -16,12 +16,22 @@ from ..utils import (
|
|||||||
class NRKBaseIE(InfoExtractor):
|
class NRKBaseIE(InfoExtractor):
|
||||||
_GEO_COUNTRIES = ['NO']
|
_GEO_COUNTRIES = ['NO']
|
||||||
|
|
||||||
|
_api_host = None
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
data = self._download_json(
|
api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
|
||||||
'http://%s/mediaelement/%s' % (self._API_HOST, video_id),
|
|
||||||
video_id, 'Downloading mediaelement JSON')
|
for api_host in api_hosts:
|
||||||
|
data = self._download_json(
|
||||||
|
'http://%s/mediaelement/%s' % (api_host, video_id),
|
||||||
|
video_id, 'Downloading mediaelement JSON',
|
||||||
|
fatal=api_host == api_hosts[-1])
|
||||||
|
if not data:
|
||||||
|
continue
|
||||||
|
self._api_host = api_host
|
||||||
|
break
|
||||||
|
|
||||||
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
|
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
|
||||||
video_id = data.get('id') or video_id
|
video_id = data.get('id') or video_id
|
||||||
@ -191,7 +201,7 @@ class NRKIE(NRKBaseIE):
|
|||||||
)
|
)
|
||||||
(?P<id>[^?#&]+)
|
(?P<id>[^?#&]+)
|
||||||
'''
|
'''
|
||||||
_API_HOST = 'v8-psapi.nrk.no'
|
_API_HOSTS = ('psapi.nrk.no', 'v8-psapi.nrk.no')
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# video
|
# video
|
||||||
'url': 'http://www.nrk.no/video/PS*150533',
|
'url': 'http://www.nrk.no/video/PS*150533',
|
||||||
@ -237,8 +247,7 @@ class NRKTVIE(NRKBaseIE):
|
|||||||
(?:/\d{2}-\d{2}-\d{4})?
|
(?:/\d{2}-\d{2}-\d{4})?
|
||||||
(?:\#del=(?P<part_id>\d+))?
|
(?:\#del=(?P<part_id>\d+))?
|
||||||
''' % _EPISODE_RE
|
''' % _EPISODE_RE
|
||||||
_API_HOST = 'psapi-we.nrk.no'
|
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||||
'md5': '4e9ca6629f09e588ed240fb11619922a',
|
'md5': '4e9ca6629f09e588ed240fb11619922a',
|
||||||
|
@ -243,7 +243,7 @@ class PhantomJSwrapper(object):
|
|||||||
|
|
||||||
|
|
||||||
class OpenloadIE(InfoExtractor):
|
class OpenloadIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://openload.co/f/kUEfGclsU9o',
|
'url': 'https://openload.co/f/kUEfGclsU9o',
|
||||||
@ -301,6 +301,16 @@ class OpenloadIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://oload.xyz/f/WwRBpzW8Wtk',
|
'url': 'https://oload.xyz/f/WwRBpzW8Wtk',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://oload.win/f/kUEfGclsU9o',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://oload.download/f/kUEfGclsU9o',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Its title has not got its extension but url has it
|
||||||
|
'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
|
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
|
||||||
@ -362,8 +372,7 @@ class OpenloadIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
|
'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
# Seems all videos have extensions in their titles
|
'ext': determine_ext(title, None) or determine_ext(url, 'mp4'),
|
||||||
'ext': determine_ext(title, 'mp4'),
|
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'http_headers': headers,
|
'http_headers': headers,
|
||||||
}
|
}
|
||||||
|
@ -360,6 +360,21 @@ class PBSIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2365936247',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Antiques Roadshow - Indianapolis, Hour 2',
|
||||||
|
'description': 'md5:524b32249db55663e7231b6b8d1671a2',
|
||||||
|
'duration': 3180,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
|
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -422,6 +437,7 @@ class PBSIE(InfoExtractor):
|
|||||||
r'<section[^>]+data-coveid="(\d+)"', # coveplayer from http://www.pbs.org/wgbh/frontline/film/real-csi/
|
r'<section[^>]+data-coveid="(\d+)"', # coveplayer from http://www.pbs.org/wgbh/frontline/film/real-csi/
|
||||||
r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>', # jwplayer
|
r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>', # jwplayer
|
||||||
r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',",
|
r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',",
|
||||||
|
r'<div[^>]+\bdata-cove-id=["\'](\d+)"', # http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/
|
||||||
]
|
]
|
||||||
|
|
||||||
media_id = self._search_regex(
|
media_id = self._search_regex(
|
||||||
|
@ -53,7 +53,8 @@ class RBMARadioIE(InfoExtractor):
|
|||||||
'format_id': compat_str(abr),
|
'format_id': compat_str(abr),
|
||||||
'abr': abr,
|
'abr': abr,
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
} for abr in (96, 128, 256)]
|
} for abr in (96, 128, 192, 256)]
|
||||||
|
self._check_formats(formats, episode_id)
|
||||||
|
|
||||||
description = clean_html(episode.get('longTeaser'))
|
description = clean_html(episode.get('longTeaser'))
|
||||||
thumbnail = self._proto_relative_url(episode.get('imageURL', {}).get('landscape'))
|
thumbnail = self._proto_relative_url(episode.get('imageURL', {}).get('landscape'))
|
||||||
|
@ -74,7 +74,14 @@ class SafariBaseIE(InfoExtractor):
|
|||||||
class SafariIE(SafariBaseIE):
|
class SafariIE(SafariBaseIE):
|
||||||
IE_NAME = 'safari'
|
IE_NAME = 'safari'
|
||||||
IE_DESC = 'safaribooksonline.com online video'
|
IE_DESC = 'safaribooksonline.com online video'
|
||||||
_VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?#&]+)\.html'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:www\.)?safaribooksonline\.com/
|
||||||
|
(?:
|
||||||
|
library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html|
|
||||||
|
videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+)
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
|
'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
|
||||||
@ -94,22 +101,41 @@ class SafariIE(SafariBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html',
|
'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
_PARTNER_ID = '1926081'
|
||||||
|
_UICONF_ID = '29375172'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = '%s/%s' % (mobj.group('course_id'), mobj.group('part'))
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
reference_id = mobj.group('reference_id')
|
||||||
reference_id = self._search_regex(
|
if reference_id:
|
||||||
r'data-reference-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
|
video_id = reference_id
|
||||||
webpage, 'kaltura reference id', group='id')
|
partner_id = self._PARTNER_ID
|
||||||
partner_id = self._search_regex(
|
ui_id = self._UICONF_ID
|
||||||
r'data-partner-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
|
else:
|
||||||
webpage, 'kaltura widget id', group='id')
|
video_id = '%s-%s' % (mobj.group('course_id'), mobj.group('part'))
|
||||||
ui_id = self._search_regex(
|
|
||||||
r'data-ui-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
|
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||||
webpage, 'kaltura uiconf id', group='id')
|
|
||||||
|
mobj = re.match(self._VALID_URL, urlh.geturl())
|
||||||
|
reference_id = mobj.group('reference_id')
|
||||||
|
if not reference_id:
|
||||||
|
reference_id = self._search_regex(
|
||||||
|
r'data-reference-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||||
|
webpage, 'kaltura reference id', group='id')
|
||||||
|
partner_id = self._search_regex(
|
||||||
|
r'data-partner-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||||
|
webpage, 'kaltura widget id', default=self._PARTNER_ID,
|
||||||
|
group='id')
|
||||||
|
ui_id = self._search_regex(
|
||||||
|
r'data-ui-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||||
|
webpage, 'kaltura uiconf id', default=self._UICONF_ID,
|
||||||
|
group='id')
|
||||||
|
|
||||||
query = {
|
query = {
|
||||||
'wid': '_%s' % partner_id,
|
'wid': '_%s' % partner_id,
|
||||||
@ -159,10 +185,15 @@ class SafariCourseIE(SafariBaseIE):
|
|||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)|
|
(?:www\.)?safaribooksonline\.com/
|
||||||
|
(?:
|
||||||
|
library/view/[^/]+|
|
||||||
|
api/v1/book|
|
||||||
|
videos/[^/]+
|
||||||
|
)|
|
||||||
techbus\.safaribooksonline\.com
|
techbus\.safaribooksonline\.com
|
||||||
)
|
)
|
||||||
/(?P<id>[^/]+)/?(?:[#?]|$)
|
/(?P<id>[^/]+)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@ -179,8 +210,16 @@ class SafariCourseIE(SafariBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://techbus.safaribooksonline.com/9780134426365',
|
'url': 'http://techbus.safaribooksonline.com/9780134426365',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return (False if SafariIE.suitable(url) or SafariApiIE.suitable(url)
|
||||||
|
else super(SafariCourseIE, cls).suitable(url))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
course_id = self._match_id(url)
|
course_id = self._match_id(url)
|
||||||
|
|
||||||
|
@ -4,6 +4,10 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .turner import TurnerBaseIE
|
from .turner import TurnerBaseIE
|
||||||
|
from ..compat import (
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
compat_parse_qs,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@ -38,48 +42,22 @@ class TBSIE(TurnerBaseIE):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
site, display_id = re.match(self._VALID_URL, url).groups()
|
site, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
video_data = self._parse_json(self._search_regex(
|
drupal_settings = self._parse_json(self._search_regex(
|
||||||
r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
|
r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
|
||||||
webpage, 'drupal setting'), display_id)['turner_playlist'][0]
|
webpage, 'drupal setting'), display_id)
|
||||||
|
video_data = drupal_settings['turner_playlist'][0]
|
||||||
|
|
||||||
media_id = video_data['mediaID']
|
media_id = video_data['mediaID']
|
||||||
title = video_data['title']
|
title = video_data['title']
|
||||||
|
tokenizer_query = compat_parse_qs(compat_urllib_parse_urlparse(
|
||||||
|
drupal_settings['ngtv_token_url']).query)
|
||||||
|
|
||||||
streams_data = self._download_json(
|
info = self._extract_ngtv_info(
|
||||||
'http://medium.ngtv.io/media/%s/tv' % media_id,
|
media_id, tokenizer_query, {
|
||||||
media_id)['media']['tv']
|
'url': url,
|
||||||
duration = None
|
'site_name': site[:3].upper(),
|
||||||
chapters = []
|
'auth_required': video_data.get('authRequired') == '1',
|
||||||
formats = []
|
})
|
||||||
for supported_type in ('unprotected', 'bulkaes'):
|
|
||||||
stream_data = streams_data.get(supported_type, {})
|
|
||||||
m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
|
|
||||||
if not m3u8_url:
|
|
||||||
continue
|
|
||||||
if stream_data.get('playlistProtection') == 'spe':
|
|
||||||
m3u8_url = self._add_akamai_spe_token(
|
|
||||||
'http://token.vgtf.net/token/token_spe',
|
|
||||||
m3u8_url, media_id, {
|
|
||||||
'url': url,
|
|
||||||
'site_name': site[:3].upper(),
|
|
||||||
'auth_required': video_data.get('authRequired') == '1',
|
|
||||||
})
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
|
|
||||||
|
|
||||||
duration = float_or_none(stream_data.get('totalRuntime') or video_data.get('duration'))
|
|
||||||
|
|
||||||
if not chapters:
|
|
||||||
for chapter in stream_data.get('contentSegments', []):
|
|
||||||
start_time = float_or_none(chapter.get('start'))
|
|
||||||
duration = float_or_none(chapter.get('duration'))
|
|
||||||
if start_time is None or duration is None:
|
|
||||||
continue
|
|
||||||
chapters.append({
|
|
||||||
'start_time': start_time,
|
|
||||||
'end_time': start_time + duration,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
for image_id, image in video_data.get('images', {}).items():
|
for image_id, image in video_data.get('images', {}).items():
|
||||||
@ -98,15 +76,14 @@ class TBSIE(TurnerBaseIE):
|
|||||||
})
|
})
|
||||||
thumbnails.append(i)
|
thumbnails.append(i)
|
||||||
|
|
||||||
return {
|
info.update({
|
||||||
'id': media_id,
|
'id': media_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': strip_or_none(video_data.get('descriptionNoTags') or video_data.get('shortDescriptionNoTags')),
|
'description': strip_or_none(video_data.get('descriptionNoTags') or video_data.get('shortDescriptionNoTags')),
|
||||||
'duration': duration,
|
'duration': float_or_none(video_data.get('duration')) or info.get('duration'),
|
||||||
'timestamp': int_or_none(video_data.get('created')),
|
'timestamp': int_or_none(video_data.get('created')),
|
||||||
'season_number': int_or_none(video_data.get('season')),
|
'season_number': int_or_none(video_data.get('season')),
|
||||||
'episode_number': int_or_none(video_data.get('episode')),
|
'episode_number': int_or_none(video_data.get('episode')),
|
||||||
'cahpters': chapters,
|
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'formats': formats,
|
})
|
||||||
}
|
return info
|
||||||
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .turner import TurnerBaseIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@ -15,7 +15,7 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TeamcocoIE(InfoExtractor):
|
class TeamcocoIE(TurnerBaseIE):
|
||||||
_VALID_URL = r'https?://teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
|
_VALID_URL = r'https?://teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@ -110,6 +110,8 @@ class TeamcocoIE(InfoExtractor):
|
|||||||
name
|
name
|
||||||
}
|
}
|
||||||
duration
|
duration
|
||||||
|
turnerMediaId
|
||||||
|
turnerMediaAuthToken
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
... on NotFoundSlug {
|
... on NotFoundSlug {
|
||||||
@ -123,53 +125,65 @@ class TeamcocoIE(InfoExtractor):
|
|||||||
record = response['record']
|
record = response['record']
|
||||||
video_id = record['id']
|
video_id = record['id']
|
||||||
|
|
||||||
video_sources = self._graphql_call('''{
|
info = {
|
||||||
%s(id: "%s") {
|
|
||||||
src
|
|
||||||
}
|
|
||||||
}''', 'RecordVideoSource', video_id) or {}
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
|
|
||||||
for format_id, src in video_sources.get('src', {}).items():
|
|
||||||
if not isinstance(src, dict):
|
|
||||||
continue
|
|
||||||
src_url = src.get('src')
|
|
||||||
if not src_url:
|
|
||||||
continue
|
|
||||||
ext = determine_ext(src_url, mimetype2ext(src.get('type')))
|
|
||||||
if format_id == 'hls' or ext == 'm3u8':
|
|
||||||
# compat_urllib_parse.urljoin does not work here
|
|
||||||
if src_url.startswith('/'):
|
|
||||||
src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
|
|
||||||
else:
|
|
||||||
if src_url.startswith('/mp4:protected/'):
|
|
||||||
# TODO Correct extraction for these files
|
|
||||||
continue
|
|
||||||
tbr = int_or_none(self._search_regex(
|
|
||||||
r'(\d+)k\.mp4', src_url, 'tbr', default=None))
|
|
||||||
|
|
||||||
formats.append({
|
|
||||||
'url': src_url,
|
|
||||||
'ext': ext,
|
|
||||||
'tbr': tbr,
|
|
||||||
'format_id': format_id,
|
|
||||||
'quality': get_quality(format_id),
|
|
||||||
})
|
|
||||||
if not formats:
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
record['file']['url'], video_id, 'mp4', fatal=False)
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'formats': formats,
|
|
||||||
'title': record['title'],
|
'title': record['title'],
|
||||||
'thumbnail': record.get('thumb', {}).get('preview'),
|
'thumbnail': record.get('thumb', {}).get('preview'),
|
||||||
'description': record.get('teaser'),
|
'description': record.get('teaser'),
|
||||||
'duration': parse_duration(record.get('duration')),
|
'duration': parse_duration(record.get('duration')),
|
||||||
'timestamp': parse_iso8601(record.get('publishOn')),
|
'timestamp': parse_iso8601(record.get('publishOn')),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
media_id = record.get('turnerMediaId')
|
||||||
|
if media_id:
|
||||||
|
self._initialize_geo_bypass({
|
||||||
|
'countries': ['US'],
|
||||||
|
})
|
||||||
|
info.update(self._extract_ngtv_info(media_id, {
|
||||||
|
'accessToken': record['turnerMediaAuthToken'],
|
||||||
|
'accessTokenType': 'jws',
|
||||||
|
}))
|
||||||
|
else:
|
||||||
|
video_sources = self._graphql_call('''{
|
||||||
|
%s(id: "%s") {
|
||||||
|
src
|
||||||
|
}
|
||||||
|
}''', 'RecordVideoSource', video_id) or {}
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
|
||||||
|
for format_id, src in video_sources.get('src', {}).items():
|
||||||
|
if not isinstance(src, dict):
|
||||||
|
continue
|
||||||
|
src_url = src.get('src')
|
||||||
|
if not src_url:
|
||||||
|
continue
|
||||||
|
ext = determine_ext(src_url, mimetype2ext(src.get('type')))
|
||||||
|
if format_id == 'hls' or ext == 'm3u8':
|
||||||
|
# compat_urllib_parse.urljoin does not work here
|
||||||
|
if src_url.startswith('/'):
|
||||||
|
src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
|
||||||
|
else:
|
||||||
|
if src_url.startswith('/mp4:protected/'):
|
||||||
|
# TODO Correct extraction for these files
|
||||||
|
continue
|
||||||
|
tbr = int_or_none(self._search_regex(
|
||||||
|
r'(\d+)k\.mp4', src_url, 'tbr', default=None))
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'url': src_url,
|
||||||
|
'ext': ext,
|
||||||
|
'tbr': tbr,
|
||||||
|
'format_id': format_id,
|
||||||
|
'quality': get_quality(format_id),
|
||||||
|
})
|
||||||
|
if not formats:
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
record['file']['url'], video_id, 'mp4', fatal=False)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
info['formats'] = formats
|
||||||
|
|
||||||
|
return info
|
||||||
|
@ -9,6 +9,7 @@ from ..utils import (
|
|||||||
xpath_text,
|
xpath_text,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
float_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
xpath_attr,
|
xpath_attr,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
@ -23,14 +24,17 @@ class TurnerBaseIE(AdobePassIE):
|
|||||||
def _extract_timestamp(self, video_data):
|
def _extract_timestamp(self, video_data):
|
||||||
return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts'))
|
return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts'))
|
||||||
|
|
||||||
def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data):
|
def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data, custom_tokenizer_query=None):
|
||||||
secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*'
|
secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*'
|
||||||
token = self._AKAMAI_SPE_TOKEN_CACHE.get(secure_path)
|
token = self._AKAMAI_SPE_TOKEN_CACHE.get(secure_path)
|
||||||
if not token:
|
if not token:
|
||||||
query = {
|
query = {
|
||||||
'path': secure_path,
|
'path': secure_path,
|
||||||
'videoId': content_id,
|
|
||||||
}
|
}
|
||||||
|
if custom_tokenizer_query:
|
||||||
|
query.update(custom_tokenizer_query)
|
||||||
|
else:
|
||||||
|
query['videoId'] = content_id
|
||||||
if ap_data.get('auth_required'):
|
if ap_data.get('auth_required'):
|
||||||
query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], content_id, ap_data['site_name'], ap_data['site_name'])
|
query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], content_id, ap_data['site_name'], ap_data['site_name'])
|
||||||
auth = self._download_xml(
|
auth = self._download_xml(
|
||||||
@ -188,3 +192,42 @@ class TurnerBaseIE(AdobePassIE):
|
|||||||
'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
|
'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
|
||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _extract_ngtv_info(self, media_id, tokenizer_query, ap_data=None):
|
||||||
|
streams_data = self._download_json(
|
||||||
|
'http://medium.ngtv.io/media/%s/tv' % media_id,
|
||||||
|
media_id)['media']['tv']
|
||||||
|
duration = None
|
||||||
|
chapters = []
|
||||||
|
formats = []
|
||||||
|
for supported_type in ('unprotected', 'bulkaes'):
|
||||||
|
stream_data = streams_data.get(supported_type, {})
|
||||||
|
m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
|
||||||
|
if not m3u8_url:
|
||||||
|
continue
|
||||||
|
if stream_data.get('playlistProtection') == 'spe':
|
||||||
|
m3u8_url = self._add_akamai_spe_token(
|
||||||
|
'http://token.ngtv.io/token/token_spe',
|
||||||
|
m3u8_url, media_id, ap_data or {}, tokenizer_query)
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
|
duration = float_or_none(stream_data.get('totalRuntime'))
|
||||||
|
|
||||||
|
if not chapters:
|
||||||
|
for chapter in stream_data.get('contentSegments', []):
|
||||||
|
start_time = float_or_none(chapter.get('start'))
|
||||||
|
chapter_duration = float_or_none(chapter.get('duration'))
|
||||||
|
if start_time is None or chapter_duration is None:
|
||||||
|
continue
|
||||||
|
chapters.append({
|
||||||
|
'start_time': start_time,
|
||||||
|
'end_time': start_time + chapter_duration,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'formats': formats,
|
||||||
|
'chapters': chapters,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
||||||
|
@ -1,13 +1,12 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
try_get,
|
|
||||||
determine_ext,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -78,42 +77,25 @@ class TV4IE(InfoExtractor):
|
|||||||
|
|
||||||
title = info['title']
|
title = info['title']
|
||||||
|
|
||||||
subtitles = {}
|
manifest_url = self._download_json(
|
||||||
formats = []
|
'https://playback-api.b17g.net/media/' + video_id,
|
||||||
# http formats are linked with unresolvable host
|
video_id, query={
|
||||||
for kind in ('hls3', ''):
|
'service': 'tv4',
|
||||||
data = self._download_json(
|
'device': 'browser',
|
||||||
'https://prima.tv4play.se/api/web/asset/%s/play.json' % video_id,
|
'protocol': 'hls',
|
||||||
video_id, 'Downloading sources JSON', query={
|
})['playbackItem']['manifestUrl']
|
||||||
'protocol': kind,
|
formats = self._extract_m3u8_formats(
|
||||||
'videoFormat': 'MP4+WEBVTT',
|
manifest_url, video_id, 'mp4',
|
||||||
})
|
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||||
items = try_get(data, lambda x: x['playback']['items']['item'])
|
formats.extend(self._extract_mpd_formats(
|
||||||
if not items:
|
manifest_url.replace('.m3u8', '.mpd'),
|
||||||
continue
|
video_id, mpd_id='dash', fatal=False))
|
||||||
if isinstance(items, dict):
|
formats.extend(self._extract_f4m_formats(
|
||||||
items = [items]
|
manifest_url.replace('.m3u8', '.f4m'),
|
||||||
for item in items:
|
video_id, f4m_id='hds', fatal=False))
|
||||||
manifest_url = item.get('url')
|
formats.extend(self._extract_ism_formats(
|
||||||
if not isinstance(manifest_url, compat_str):
|
re.sub(r'\.ism/.+?\.m3u8', r'.ism/Manifest', manifest_url),
|
||||||
continue
|
video_id, ism_id='mss', fatal=False))
|
||||||
ext = determine_ext(manifest_url)
|
|
||||||
if ext == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
manifest_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
|
||||||
m3u8_id=kind, fatal=False))
|
|
||||||
elif ext == 'f4m':
|
|
||||||
formats.extend(self._extract_akamai_formats(
|
|
||||||
manifest_url, video_id, {
|
|
||||||
'hls': 'tv4play-i.akamaihd.net',
|
|
||||||
}))
|
|
||||||
elif ext == 'webvtt':
|
|
||||||
subtitles = self._merge_subtitles(
|
|
||||||
subtitles, {
|
|
||||||
'sv': [{
|
|
||||||
'url': manifest_url,
|
|
||||||
'ext': 'vtt',
|
|
||||||
}]})
|
|
||||||
|
|
||||||
if not formats and info.get('is_geo_restricted'):
|
if not formats and info.get('is_geo_restricted'):
|
||||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||||
@ -124,7 +106,7 @@ class TV4IE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
# 'subtitles': subtitles,
|
||||||
'description': info.get('description'),
|
'description': info.get('description'),
|
||||||
'timestamp': parse_iso8601(info.get('broadcast_date_time')),
|
'timestamp': parse_iso8601(info.get('broadcast_date_time')),
|
||||||
'duration': int_or_none(info.get('duration')),
|
'duration': int_or_none(info.get('duration')),
|
||||||
|
148
youtube_dl/extractor/tvnet.py
Normal file
148
youtube_dl/extractor/tvnet.py
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
unescapeHTML,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TVNetIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:[^/]+)\.tvnet\.gov\.vn/[^/]+/(?:\d+/)?(?P<id>\d+)(?:/|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
# video
|
||||||
|
'url': 'http://de.tvnet.gov.vn/video/109788/vtv1---bac-tuyet-tai-lao-cai-va-ha-giang/tin-nong-24h',
|
||||||
|
'md5': 'b4d7abe0252c9b47774760b7519c7558',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '109788',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'VTV1 - Bắc tuyết tại Lào Cai và Hà Giang',
|
||||||
|
'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
|
||||||
|
'is_live': False,
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# audio
|
||||||
|
'url': 'http://vn.tvnet.gov.vn/radio/27017/vov1---ban-tin-chieu-10062018/doi-song-va-xa-hoi',
|
||||||
|
'md5': 'b5875ce9b0a2eecde029216d0e6db2ae',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '27017',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'title': 'VOV1 - Bản tin chiều (10/06/2018)',
|
||||||
|
'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
|
||||||
|
'is_live': False,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://us.tvnet.gov.vn/video/118023/129999/ngay-0705',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '129999',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'VTV1 - Quốc hội với cử tri (11/06/2018)',
|
||||||
|
'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
|
||||||
|
'is_live': False,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# live stream
|
||||||
|
'url': 'http://us.tvnet.gov.vn/kenh-truyen-hinh/1011/vtv1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1011',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': r're:^VTV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# radio live stream
|
||||||
|
'url': 'http://vn.tvnet.gov.vn/kenh-truyen-hinh/1014',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1014',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'title': r're:VOV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://us.tvnet.gov.vn/phim/6136/25510/vtv3---ca-mot-doi-an-oan-tap-1-50/phim-truyen-hinh',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._og_search_title(
|
||||||
|
webpage, default=None) or self._html_search_meta(
|
||||||
|
'title', webpage, default=None) or self._search_regex(
|
||||||
|
r'<title>([^<]+)<', webpage, 'title')
|
||||||
|
title = re.sub(r'\s*-\s*TV Net\s*$', '', title)
|
||||||
|
|
||||||
|
if '/video/' in url or '/radio/' in url:
|
||||||
|
is_live = False
|
||||||
|
elif '/kenh-truyen-hinh/' in url:
|
||||||
|
is_live = True
|
||||||
|
else:
|
||||||
|
is_live = None
|
||||||
|
|
||||||
|
data_file = unescapeHTML(self._search_regex(
|
||||||
|
r'data-file=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage,
|
||||||
|
'data file', group='url'))
|
||||||
|
|
||||||
|
stream_urls = set()
|
||||||
|
formats = []
|
||||||
|
for stream in self._download_json(data_file, video_id):
|
||||||
|
if not isinstance(stream, dict):
|
||||||
|
continue
|
||||||
|
stream_url = stream.get('url')
|
||||||
|
if (stream_url in stream_urls or not stream_url or
|
||||||
|
not isinstance(stream_url, compat_str)):
|
||||||
|
continue
|
||||||
|
stream_urls.add(stream_url)
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
stream_url, video_id, 'mp4',
|
||||||
|
entry_protocol='m3u8' if is_live else 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
# better support for radio streams
|
||||||
|
if title.startswith('VOV'):
|
||||||
|
for f in formats:
|
||||||
|
f.update({
|
||||||
|
'ext': 'm4a',
|
||||||
|
'vcodec': 'none',
|
||||||
|
})
|
||||||
|
|
||||||
|
thumbnail = self._og_search_thumbnail(
|
||||||
|
webpage, default=None) or unescapeHTML(
|
||||||
|
self._search_regex(
|
||||||
|
r'data-image=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage,
|
||||||
|
'thumbnail', default=None, group='url'))
|
||||||
|
|
||||||
|
if is_live:
|
||||||
|
title = self._live_title(title)
|
||||||
|
|
||||||
|
view_count = int_or_none(self._search_regex(
|
||||||
|
r'(?s)<div[^>]+\bclass=["\'].*?view-count[^>]+>.*?(\d+).*?</div>',
|
||||||
|
webpage, 'view count', default=None))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'is_live': is_live,
|
||||||
|
'view_count': view_count,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -63,7 +63,7 @@ class TwitterCardIE(TwitterBaseIE):
|
|||||||
'id': '623160978427936768',
|
'id': '623160978427936768',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Twitter web player',
|
'title': 'Twitter web player',
|
||||||
'thumbnail': r're:^https?://.*(?:\bformat=|\.)jpg',
|
'thumbnail': r're:^https?://.*$',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -108,6 +108,8 @@ class TwitterCardIE(TwitterBaseIE):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
_API_BASE = 'https://api.twitter.com/1.1'
|
||||||
|
|
||||||
def _parse_media_info(self, media_info, video_id):
|
def _parse_media_info(self, media_info, video_id):
|
||||||
formats = []
|
formats = []
|
||||||
for media_variant in media_info.get('variants', []):
|
for media_variant in media_info.get('variants', []):
|
||||||
@ -149,7 +151,7 @@ class TwitterCardIE(TwitterBaseIE):
|
|||||||
main_script, 'bearer token')
|
main_script, 'bearer token')
|
||||||
# https://developer.twitter.com/en/docs/tweets/post-and-engage/api-reference/get-statuses-show-id
|
# https://developer.twitter.com/en/docs/tweets/post-and-engage/api-reference/get-statuses-show-id
|
||||||
api_data = self._download_json(
|
api_data = self._download_json(
|
||||||
'https://api.twitter.com/1.1/statuses/show/%s.json' % video_id,
|
'%s/statuses/show/%s.json' % (self._API_BASE, video_id),
|
||||||
video_id, 'Downloading API data',
|
video_id, 'Downloading API data',
|
||||||
headers={
|
headers={
|
||||||
'Authorization': 'Bearer ' + bearer_token,
|
'Authorization': 'Bearer ' + bearer_token,
|
||||||
@ -223,15 +225,49 @@ class TwitterCardIE(TwitterBaseIE):
|
|||||||
formats.extend(self._extract_mobile_formats(username, video_id))
|
formats.extend(self._extract_mobile_formats(username, video_id))
|
||||||
|
|
||||||
if formats:
|
if formats:
|
||||||
|
title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
||||||
|
thumbnail = config.get('posterImageUrl') or config.get('image_src')
|
||||||
|
duration = float_or_none(config.get('duration'), scale=1000) or duration
|
||||||
break
|
break
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
headers = {
|
||||||
|
'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw',
|
||||||
|
'Referer': url,
|
||||||
|
}
|
||||||
|
ct0 = self._get_cookies(url).get('ct0')
|
||||||
|
if ct0:
|
||||||
|
headers['csrf_token'] = ct0.value
|
||||||
|
guest_token = self._download_json(
|
||||||
|
'%s/guest/activate.json' % self._API_BASE, video_id,
|
||||||
|
'Downloading guest token', data=b'',
|
||||||
|
headers=headers)['guest_token']
|
||||||
|
headers['x-guest-token'] = guest_token
|
||||||
|
self._set_cookie('api.twitter.com', 'gt', guest_token)
|
||||||
|
config = self._download_json(
|
||||||
|
'%s/videos/tweet/config/%s.json' % (self._API_BASE, video_id),
|
||||||
|
video_id, headers=headers)
|
||||||
|
track = config['track']
|
||||||
|
vmap_url = track.get('vmapUrl')
|
||||||
|
if vmap_url:
|
||||||
|
formats = self._extract_formats_from_vmap_url(vmap_url, video_id)
|
||||||
|
else:
|
||||||
|
playback_url = track['playbackUrl']
|
||||||
|
if determine_ext(playback_url) == 'm3u8':
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
playback_url, video_id, 'mp4',
|
||||||
|
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||||
|
else:
|
||||||
|
formats = [{
|
||||||
|
'url': playback_url,
|
||||||
|
}]
|
||||||
|
title = 'Twitter web player'
|
||||||
|
thumbnail = config.get('posterImage')
|
||||||
|
duration = float_or_none(track.get('durationMs'), scale=1000)
|
||||||
|
|
||||||
self._remove_duplicate_formats(formats)
|
self._remove_duplicate_formats(formats)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
|
||||||
thumbnail = config.get('posterImageUrl') or config.get('image_src')
|
|
||||||
duration = float_or_none(config.get('duration'), scale=1000) or duration
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
@ -375,6 +411,22 @@ class TwitterIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires ffmpeg
|
'skip_download': True, # requires ffmpeg
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# card via api.twitter.com/1.1/videos/tweet/config
|
||||||
|
'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1001551623938805763',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 're:.*?Shep is on a roll today.*?',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'description': 'md5:63b036c228772523ae1924d5f8e5ed6b',
|
||||||
|
'uploader': 'Lis Power',
|
||||||
|
'uploader_id': 'LisPower1',
|
||||||
|
'duration': 111.278,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # requires ffmpeg
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -36,7 +36,8 @@ class WimpIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
youtube_id = self._search_regex(
|
youtube_id = self._search_regex(
|
||||||
r"videoId\s*:\s*[\"']([0-9A-Za-z_-]{11})[\"']",
|
(r"videoId\s*:\s*[\"']([0-9A-Za-z_-]{11})[\"']",
|
||||||
|
r'data-id=["\']([0-9A-Za-z_-]{11})'),
|
||||||
webpage, 'video URL', default=None)
|
webpage, 'video URL', default=None)
|
||||||
if youtube_id:
|
if youtube_id:
|
||||||
return {
|
return {
|
||||||
|
@ -510,6 +510,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
|
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
|
||||||
'license': 'Standard YouTube License',
|
'license': 'Standard YouTube License',
|
||||||
'creator': 'Icona Pop',
|
'creator': 'Icona Pop',
|
||||||
|
'track': 'I Love It (feat. Charli XCX)',
|
||||||
|
'artist': 'Icona Pop',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -528,6 +530,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
|
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
|
||||||
'license': 'Standard YouTube License',
|
'license': 'Standard YouTube License',
|
||||||
'creator': 'Justin Timberlake',
|
'creator': 'Justin Timberlake',
|
||||||
|
'track': 'Tunnel Vision',
|
||||||
|
'artist': 'Justin Timberlake',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -597,7 +601,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'id': 'IB3lcPjvWLA',
|
'id': 'IB3lcPjvWLA',
|
||||||
'ext': 'm4a',
|
'ext': 'm4a',
|
||||||
'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
|
'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
|
||||||
'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
|
'description': 'md5:1900ed86ee514927b9e00fbead6969a5',
|
||||||
'duration': 244,
|
'duration': 244,
|
||||||
'uploader': 'AfrojackVEVO',
|
'uploader': 'AfrojackVEVO',
|
||||||
'uploader_id': 'AfrojackVEVO',
|
'uploader_id': 'AfrojackVEVO',
|
||||||
@ -638,7 +642,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'duration': 219,
|
'duration': 219,
|
||||||
'upload_date': '20100909',
|
'upload_date': '20100909',
|
||||||
'uploader': 'The Amazing Atheist',
|
'uploader': 'TJ Kirk',
|
||||||
'uploader_id': 'TheAmazingAtheist',
|
'uploader_id': 'TheAmazingAtheist',
|
||||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
|
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
|
||||||
'license': 'Standard YouTube License',
|
'license': 'Standard YouTube License',
|
||||||
@ -668,10 +672,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
|
'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '6kLq3WMV1nU',
|
'id': '6kLq3WMV1nU',
|
||||||
'ext': 'mp4',
|
'ext': 'webm',
|
||||||
'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
|
'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
|
||||||
'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
|
'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
|
||||||
'duration': 247,
|
'duration': 246,
|
||||||
'uploader': 'LloydVEVO',
|
'uploader': 'LloydVEVO',
|
||||||
'uploader_id': 'LloydVEVO',
|
'uploader_id': 'LloydVEVO',
|
||||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
|
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
|
||||||
@ -733,7 +737,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'uploader_id': 'AllenMeow',
|
'uploader_id': 'AllenMeow',
|
||||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
|
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
|
||||||
'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
|
'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
|
||||||
'uploader': '孫艾倫',
|
'uploader': '孫ᄋᄅ',
|
||||||
'license': 'Standard YouTube License',
|
'license': 'Standard YouTube License',
|
||||||
'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
|
'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
|
||||||
},
|
},
|
||||||
@ -760,7 +764,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
|
'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'FIl7x6_3R5Y',
|
'id': 'FIl7x6_3R5Y',
|
||||||
'ext': 'mp4',
|
'ext': 'webm',
|
||||||
'title': 'md5:7b81415841e02ecd4313668cde88737a',
|
'title': 'md5:7b81415841e02ecd4313668cde88737a',
|
||||||
'description': 'md5:116377fd2963b81ec4ce64b542173306',
|
'description': 'md5:116377fd2963b81ec4ce64b542173306',
|
||||||
'duration': 220,
|
'duration': 220,
|
||||||
@ -769,8 +773,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
|
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
|
||||||
'uploader': 'dorappi2000',
|
'uploader': 'dorappi2000',
|
||||||
'license': 'Standard YouTube License',
|
'license': 'Standard YouTube License',
|
||||||
'formats': 'mincount:32',
|
'formats': 'mincount:31',
|
||||||
},
|
},
|
||||||
|
'skip': 'not actual anymore',
|
||||||
},
|
},
|
||||||
# DASH manifest with segment_list
|
# DASH manifest with segment_list
|
||||||
{
|
{
|
||||||
@ -885,7 +890,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'id': 'lsguqyKfVQg',
|
'id': 'lsguqyKfVQg',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
|
'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
|
||||||
'alt_title': 'Dark Walk',
|
'alt_title': 'Dark Walk - Position Music',
|
||||||
'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
|
'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
|
||||||
'duration': 133,
|
'duration': 133,
|
||||||
'upload_date': '20151119',
|
'upload_date': '20151119',
|
||||||
@ -893,7 +898,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
|
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
|
||||||
'uploader': 'IronSoulElf',
|
'uploader': 'IronSoulElf',
|
||||||
'license': 'Standard YouTube License',
|
'license': 'Standard YouTube License',
|
||||||
'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
|
'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
|
||||||
|
'track': 'Dark Walk - Position Music',
|
||||||
|
'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -950,7 +957,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'description': 'md5:dda0d780d5a6e120758d1711d062a867',
|
'description': 'md5:dda0d780d5a6e120758d1711d062a867',
|
||||||
'duration': 4060,
|
'duration': 4060,
|
||||||
'upload_date': '20151119',
|
'upload_date': '20151119',
|
||||||
'uploader': 'Bernie 2016',
|
'uploader': 'Bernie Sanders',
|
||||||
'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
|
'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
|
||||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
|
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
|
||||||
'license': 'Creative Commons Attribution license (reuse allowed)',
|
'license': 'Creative Commons Attribution license (reuse allowed)',
|
||||||
@ -985,6 +992,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'This video is not available.',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# YouTube Red video with episode data
|
# YouTube Red video with episode data
|
||||||
@ -993,7 +1001,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'id': 'iqKdEhx-dD4',
|
'id': 'iqKdEhx-dD4',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Isolation - Mind Field (Ep 1)',
|
'title': 'Isolation - Mind Field (Ep 1)',
|
||||||
'description': 'md5:8013b7ddea787342608f63a13ddc9492',
|
'description': 'md5:25b78d2f64ae81719f5c96319889b736',
|
||||||
'duration': 2085,
|
'duration': 2085,
|
||||||
'upload_date': '20170118',
|
'upload_date': '20170118',
|
||||||
'uploader': 'Vsauce',
|
'uploader': 'Vsauce',
|
||||||
@ -1026,7 +1034,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
|
'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
|
||||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
|
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
|
||||||
'license': 'Standard YouTube License',
|
'license': 'Standard YouTube License',
|
||||||
'view_count': int,
|
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -1694,128 +1701,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
|
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
|
||||||
raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
|
raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
|
||||||
|
|
||||||
# Start extracting information
|
|
||||||
self.report_information_extraction(video_id)
|
|
||||||
|
|
||||||
# uploader
|
|
||||||
video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
|
|
||||||
if video_uploader:
|
|
||||||
video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
|
|
||||||
else:
|
|
||||||
self._downloader.report_warning('unable to extract uploader name')
|
|
||||||
|
|
||||||
# uploader_id
|
|
||||||
video_uploader_id = None
|
|
||||||
video_uploader_url = None
|
|
||||||
mobj = re.search(
|
|
||||||
r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
|
|
||||||
video_webpage)
|
|
||||||
if mobj is not None:
|
|
||||||
video_uploader_id = mobj.group('uploader_id')
|
|
||||||
video_uploader_url = mobj.group('uploader_url')
|
|
||||||
else:
|
|
||||||
self._downloader.report_warning('unable to extract uploader nickname')
|
|
||||||
|
|
||||||
# thumbnail image
|
|
||||||
# We try first to get a high quality image:
|
|
||||||
m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
|
|
||||||
video_webpage, re.DOTALL)
|
|
||||||
if m_thumb is not None:
|
|
||||||
video_thumbnail = m_thumb.group(1)
|
|
||||||
elif 'thumbnail_url' not in video_info:
|
|
||||||
self._downloader.report_warning('unable to extract video thumbnail')
|
|
||||||
video_thumbnail = None
|
|
||||||
else: # don't panic if we can't find it
|
|
||||||
video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
|
|
||||||
|
|
||||||
# upload date
|
|
||||||
upload_date = self._html_search_meta(
|
|
||||||
'datePublished', video_webpage, 'upload date', default=None)
|
|
||||||
if not upload_date:
|
|
||||||
upload_date = self._search_regex(
|
|
||||||
[r'(?s)id="eow-date.*?>(.*?)</span>',
|
|
||||||
r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
|
|
||||||
video_webpage, 'upload date', default=None)
|
|
||||||
upload_date = unified_strdate(upload_date)
|
|
||||||
|
|
||||||
video_license = self._html_search_regex(
|
|
||||||
r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
|
|
||||||
video_webpage, 'license', default=None)
|
|
||||||
|
|
||||||
m_music = re.search(
|
|
||||||
r'''(?x)
|
|
||||||
<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
|
|
||||||
<ul[^>]*>\s*
|
|
||||||
<li>(?P<title>.+?)
|
|
||||||
by (?P<creator>.+?)
|
|
||||||
(?:
|
|
||||||
\(.+?\)|
|
|
||||||
<a[^>]*
|
|
||||||
(?:
|
|
||||||
\bhref=["\']/red[^>]*>| # drop possible
|
|
||||||
>\s*Listen ad-free with YouTube Red # YouTube Red ad
|
|
||||||
)
|
|
||||||
.*?
|
|
||||||
)?</li
|
|
||||||
''',
|
|
||||||
video_webpage)
|
|
||||||
if m_music:
|
|
||||||
video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
|
|
||||||
video_creator = clean_html(m_music.group('creator'))
|
|
||||||
else:
|
|
||||||
video_alt_title = video_creator = None
|
|
||||||
|
|
||||||
m_episode = re.search(
|
|
||||||
r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
|
|
||||||
video_webpage)
|
|
||||||
if m_episode:
|
|
||||||
series = m_episode.group('series')
|
|
||||||
season_number = int(m_episode.group('season'))
|
|
||||||
episode_number = int(m_episode.group('episode'))
|
|
||||||
else:
|
|
||||||
series = season_number = episode_number = None
|
|
||||||
|
|
||||||
m_cat_container = self._search_regex(
|
|
||||||
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
|
|
||||||
video_webpage, 'categories', default=None)
|
|
||||||
if m_cat_container:
|
|
||||||
category = self._html_search_regex(
|
|
||||||
r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
|
|
||||||
default=None)
|
|
||||||
video_categories = None if category is None else [category]
|
|
||||||
else:
|
|
||||||
video_categories = None
|
|
||||||
|
|
||||||
video_tags = [
|
|
||||||
unescapeHTML(m.group('content'))
|
|
||||||
for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
|
|
||||||
|
|
||||||
def _extract_count(count_name):
|
|
||||||
return str_to_int(self._search_regex(
|
|
||||||
r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
|
|
||||||
% re.escape(count_name),
|
|
||||||
video_webpage, count_name, default=None))
|
|
||||||
|
|
||||||
like_count = _extract_count('like')
|
|
||||||
dislike_count = _extract_count('dislike')
|
|
||||||
|
|
||||||
# subtitles
|
|
||||||
video_subtitles = self.extract_subtitles(video_id, video_webpage)
|
|
||||||
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
|
|
||||||
|
|
||||||
video_duration = try_get(
|
|
||||||
video_info, lambda x: int_or_none(x['length_seconds'][0]))
|
|
||||||
if not video_duration:
|
|
||||||
video_duration = parse_duration(self._html_search_meta(
|
|
||||||
'duration', video_webpage, 'video duration'))
|
|
||||||
|
|
||||||
# annotations
|
|
||||||
video_annotations = None
|
|
||||||
if self._downloader.params.get('writeannotations', False):
|
|
||||||
video_annotations = self._extract_annotations(video_id)
|
|
||||||
|
|
||||||
chapters = self._extract_chapters(description_original, video_duration)
|
|
||||||
|
|
||||||
def _extract_filesize(media_url):
|
def _extract_filesize(media_url):
|
||||||
return int_or_none(self._search_regex(
|
return int_or_none(self._search_regex(
|
||||||
r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
|
r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
|
||||||
@ -1990,6 +1875,133 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
raise ExtractorError(error_message, expected=True)
|
raise ExtractorError(error_message, expected=True)
|
||||||
raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
|
raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
|
||||||
|
|
||||||
|
# uploader
|
||||||
|
video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
|
||||||
|
if video_uploader:
|
||||||
|
video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
|
||||||
|
else:
|
||||||
|
self._downloader.report_warning('unable to extract uploader name')
|
||||||
|
|
||||||
|
# uploader_id
|
||||||
|
video_uploader_id = None
|
||||||
|
video_uploader_url = None
|
||||||
|
mobj = re.search(
|
||||||
|
r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
|
||||||
|
video_webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
video_uploader_id = mobj.group('uploader_id')
|
||||||
|
video_uploader_url = mobj.group('uploader_url')
|
||||||
|
else:
|
||||||
|
self._downloader.report_warning('unable to extract uploader nickname')
|
||||||
|
|
||||||
|
# thumbnail image
|
||||||
|
# We try first to get a high quality image:
|
||||||
|
m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
|
||||||
|
video_webpage, re.DOTALL)
|
||||||
|
if m_thumb is not None:
|
||||||
|
video_thumbnail = m_thumb.group(1)
|
||||||
|
elif 'thumbnail_url' not in video_info:
|
||||||
|
self._downloader.report_warning('unable to extract video thumbnail')
|
||||||
|
video_thumbnail = None
|
||||||
|
else: # don't panic if we can't find it
|
||||||
|
video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
|
||||||
|
|
||||||
|
# upload date
|
||||||
|
upload_date = self._html_search_meta(
|
||||||
|
'datePublished', video_webpage, 'upload date', default=None)
|
||||||
|
if not upload_date:
|
||||||
|
upload_date = self._search_regex(
|
||||||
|
[r'(?s)id="eow-date.*?>(.*?)</span>',
|
||||||
|
r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
|
||||||
|
video_webpage, 'upload date', default=None)
|
||||||
|
upload_date = unified_strdate(upload_date)
|
||||||
|
|
||||||
|
video_license = self._html_search_regex(
|
||||||
|
r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
|
||||||
|
video_webpage, 'license', default=None)
|
||||||
|
|
||||||
|
m_music = re.search(
|
||||||
|
r'''(?x)
|
||||||
|
<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
|
||||||
|
<ul[^>]*>\s*
|
||||||
|
<li>(?P<title>.+?)
|
||||||
|
by (?P<creator>.+?)
|
||||||
|
(?:
|
||||||
|
\(.+?\)|
|
||||||
|
<a[^>]*
|
||||||
|
(?:
|
||||||
|
\bhref=["\']/red[^>]*>| # drop possible
|
||||||
|
>\s*Listen ad-free with YouTube Red # YouTube Red ad
|
||||||
|
)
|
||||||
|
.*?
|
||||||
|
)?</li
|
||||||
|
''',
|
||||||
|
video_webpage)
|
||||||
|
if m_music:
|
||||||
|
video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
|
||||||
|
video_creator = clean_html(m_music.group('creator'))
|
||||||
|
else:
|
||||||
|
video_alt_title = video_creator = None
|
||||||
|
|
||||||
|
def extract_meta(field):
|
||||||
|
return self._html_search_regex(
|
||||||
|
r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
|
||||||
|
video_webpage, field, default=None)
|
||||||
|
|
||||||
|
track = extract_meta('Song')
|
||||||
|
artist = extract_meta('Artist')
|
||||||
|
|
||||||
|
m_episode = re.search(
|
||||||
|
r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
|
||||||
|
video_webpage)
|
||||||
|
if m_episode:
|
||||||
|
series = m_episode.group('series')
|
||||||
|
season_number = int(m_episode.group('season'))
|
||||||
|
episode_number = int(m_episode.group('episode'))
|
||||||
|
else:
|
||||||
|
series = season_number = episode_number = None
|
||||||
|
|
||||||
|
m_cat_container = self._search_regex(
|
||||||
|
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
|
||||||
|
video_webpage, 'categories', default=None)
|
||||||
|
if m_cat_container:
|
||||||
|
category = self._html_search_regex(
|
||||||
|
r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
|
||||||
|
default=None)
|
||||||
|
video_categories = None if category is None else [category]
|
||||||
|
else:
|
||||||
|
video_categories = None
|
||||||
|
|
||||||
|
video_tags = [
|
||||||
|
unescapeHTML(m.group('content'))
|
||||||
|
for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
|
||||||
|
|
||||||
|
def _extract_count(count_name):
|
||||||
|
return str_to_int(self._search_regex(
|
||||||
|
r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
|
||||||
|
% re.escape(count_name),
|
||||||
|
video_webpage, count_name, default=None))
|
||||||
|
|
||||||
|
like_count = _extract_count('like')
|
||||||
|
dislike_count = _extract_count('dislike')
|
||||||
|
|
||||||
|
# subtitles
|
||||||
|
video_subtitles = self.extract_subtitles(video_id, video_webpage)
|
||||||
|
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
|
||||||
|
|
||||||
|
video_duration = try_get(
|
||||||
|
video_info, lambda x: int_or_none(x['length_seconds'][0]))
|
||||||
|
if not video_duration:
|
||||||
|
video_duration = parse_duration(self._html_search_meta(
|
||||||
|
'duration', video_webpage, 'video duration'))
|
||||||
|
|
||||||
|
# annotations
|
||||||
|
video_annotations = None
|
||||||
|
if self._downloader.params.get('writeannotations', False):
|
||||||
|
video_annotations = self._extract_annotations(video_id)
|
||||||
|
|
||||||
|
chapters = self._extract_chapters(description_original, video_duration)
|
||||||
|
|
||||||
# Look for the DASH manifest
|
# Look for the DASH manifest
|
||||||
if self._downloader.params.get('youtube_include_dash_manifest', True):
|
if self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||||
dash_mpd_fatal = True
|
dash_mpd_fatal = True
|
||||||
@ -2055,9 +2067,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'uploader_url': video_uploader_url,
|
'uploader_url': video_uploader_url,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'license': video_license,
|
'license': video_license,
|
||||||
'creator': video_creator,
|
'creator': video_creator or artist,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'alt_title': video_alt_title,
|
'alt_title': video_alt_title or track,
|
||||||
'thumbnail': video_thumbnail,
|
'thumbnail': video_thumbnail,
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
'categories': video_categories,
|
'categories': video_categories,
|
||||||
@ -2080,6 +2092,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'series': series,
|
'series': series,
|
||||||
'season_number': season_number,
|
'season_number': season_number,
|
||||||
'episode_number': episode_number,
|
'episode_number': episode_number,
|
||||||
|
'track': track,
|
||||||
|
'artist': artist,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1228,7 +1228,7 @@ def unified_timestamp(date_str, day_first=True):
|
|||||||
|
|
||||||
|
|
||||||
def determine_ext(url, default_ext='unknown_video'):
|
def determine_ext(url, default_ext='unknown_video'):
|
||||||
if url is None:
|
if url is None or '.' not in url:
|
||||||
return default_ext
|
return default_ext
|
||||||
guess = url.partition('?')[0].rpartition('.')[2]
|
guess = url.partition('?')[0].rpartition('.')[2]
|
||||||
if re.match(r'^[A-Za-z0-9]+$', guess):
|
if re.match(r'^[A-Za-z0-9]+$', guess):
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2018.05.26'
|
__version__ = '2018.06.14'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user