commit
139208da99
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.01**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.22**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2016.12.01
|
[debug] youtube-dl version 2016.12.22
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
1
AUTHORS
1
AUTHORS
@ -190,3 +190,4 @@ John Hawkinson
|
|||||||
Rich Leeper
|
Rich Leeper
|
||||||
Zhong Jianxin
|
Zhong Jianxin
|
||||||
Thor77
|
Thor77
|
||||||
|
Mattias Wadman
|
||||||
|
99
ChangeLog
99
ChangeLog
@ -1,8 +1,105 @@
|
|||||||
version <unreleased>
|
version 2016.12.22
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [extractor/common] Improve detection of video-only formats in m3u8
|
||||||
|
manifests (#11507)
|
||||||
|
|
||||||
Extractors
|
Extractors
|
||||||
|
+ [theplatform] Pass geo verification headers to SMIL request (#10146)
|
||||||
|
+ [viu] Pass geo verification headers to auth request
|
||||||
|
* [rtl2] Extract more formats and metadata
|
||||||
|
* [vbox7] Skip malformed JSON-LD (#11501)
|
||||||
|
* [uplynk] Force downloading using native HLS downloader (#11496)
|
||||||
|
+ [laola1] Add support for another extraction scenario (#11460)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.12.20
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [extractor/common] Improve fragment URL construction for DASH media
|
||||||
|
* [extractor/common] Fix codec information extraction for mixed audio/video
|
||||||
|
DASH media (#11490)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [vbox7] Fix extraction (#11494)
|
||||||
|
+ [uktvplay] Add support for uktvplay.uktv.co.uk (#11027)
|
||||||
|
+ [piksel] Add support for player.piksel.com (#11246)
|
||||||
|
+ [vimeo] Add support for DASH formats
|
||||||
|
* [vimeo] Fix extraction for HLS formats (#11490)
|
||||||
|
* [kaltura] Fix wrong widget ID in some cases (#11480)
|
||||||
|
+ [nrktv:direkte] Add support for live streams (#11488)
|
||||||
|
* [pbs] Fix extraction for geo restricted videos (#7095)
|
||||||
|
* [brightcove:new] Skip widevine classic videos
|
||||||
|
+ [viu] Add support for viu.com (#10607, #11329)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.12.18
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [extractor/common] Recognize DASH formats in html5 media entries
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [ccma] Add support for ccma.cat (#11359)
|
||||||
|
* [laola1tv] Improve extraction
|
||||||
|
+ [laola1tv] Add support embed URLs (#11460)
|
||||||
|
* [nbc] Fix extraction for MSNBC videos (#11466)
|
||||||
|
* [twitch] Adapt to new videos pages URL schema (#11469)
|
||||||
|
+ [meipai] Add support for meipai.com (#10718)
|
||||||
|
* [jwplatform] Improve subtitles and duration extraction
|
||||||
|
+ [ondemandkorea] Add support for ondemandkorea.com (#10772)
|
||||||
|
+ [vvvvid] Add support for vvvvid.it (#5915)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.12.15
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [utils] Add convenience urljoin
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [openload] Recognize oload.tv URLs (#10408)
|
||||||
|
+ [facebook] Recognize .onion URLs (#11443)
|
||||||
|
* [vlive] Fix extraction (#11375, #11383)
|
||||||
|
+ [canvas] Extract DASH formats
|
||||||
|
+ [melonvod] Add support for vod.melon.com (#11419)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.12.12
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [utils] Add common user agents map
|
||||||
|
+ [common] Recognize HLS manifests that contain video only formats (#11394)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [dplay] Use Safari user agent for HLS (#11418)
|
||||||
|
+ [facebook] Detect login required error message
|
||||||
|
* [facebook] Improve video selection (#11390)
|
||||||
|
+ [canalplus] Add another video id pattern (#11399)
|
||||||
|
* [mixcloud] Relax URL regular expression (#11406)
|
||||||
|
* [ctvnews] Relax URL regular expression (#11394)
|
||||||
|
+ [rte] Capture and output error message (#7746, #10498)
|
||||||
|
+ [prosiebensat1] Add support for DASH formats
|
||||||
|
* [srgssr] Improve extraction for geo restricted videos (#11089)
|
||||||
|
* [rts] Improve extraction for geo restricted videos (#4989)
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.12.09
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [socks] Fix error reporting (#11355)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [openload] Fix extraction (#10408)
|
||||||
|
* [pandoratv] Fix extraction (#11023)
|
||||||
|
+ [telebruxelles] Add support for emission URLs
|
||||||
|
* [telebruxelles] Extract all formats
|
||||||
|
+ [bloomberg] Add another video id regular expression (#11371)
|
||||||
|
* [fusion] Update ooyala id regular expression (#11364)
|
||||||
|
+ [1tv] Add support for playlists (#11335)
|
||||||
|
* [1tv] Improve extraction (#11335)
|
||||||
|
+ [aenetworks] Extract more formats (#11321)
|
||||||
+ [thisoldhouse] Recognize /tv-episode/ URLs (#11271)
|
+ [thisoldhouse] Recognize /tv-episode/ URLs (#11271)
|
||||||
|
|
||||||
|
|
||||||
version 2016.12.01
|
version 2016.12.01
|
||||||
|
|
||||||
Extractors
|
Extractors
|
||||||
|
@ -638,7 +638,7 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin
|
|||||||
- `acodec`: Name of the audio codec in use
|
- `acodec`: Name of the audio codec in use
|
||||||
- `vcodec`: Name of the video codec in use
|
- `vcodec`: Name of the video codec in use
|
||||||
- `container`: Name of the container format
|
- `container`: Name of the container format
|
||||||
- `protocol`: The protocol that will be used for the actual download, lower-case. `http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `m3u8`, or `m3u8_native`
|
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `m3u8`, or `m3u8_native`)
|
||||||
- `format_id`: A short description of the format
|
- `format_id`: A short description of the format
|
||||||
|
|
||||||
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
|
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
|
||||||
@ -932,7 +932,7 @@ If you want to create a build of youtube-dl yourself, you'll need
|
|||||||
|
|
||||||
If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
|
If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
|
||||||
|
|
||||||
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
|
After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||||
|
|
||||||
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
|
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
|
||||||
2. Check out the source code with:
|
2. Check out the source code with:
|
||||||
|
@ -131,6 +131,7 @@
|
|||||||
- **cbsnews**: CBS News
|
- **cbsnews**: CBS News
|
||||||
- **cbsnews:livevideo**: CBS News Live Videos
|
- **cbsnews:livevideo**: CBS News Live Videos
|
||||||
- **CBSSports**
|
- **CBSSports**
|
||||||
|
- **CCMA**
|
||||||
- **CCTV**
|
- **CCTV**
|
||||||
- **CDA**
|
- **CDA**
|
||||||
- **CeskaTelevize**
|
- **CeskaTelevize**
|
||||||
@ -364,7 +365,8 @@
|
|||||||
- **kuwo:singer**: 酷我音乐 - 歌手
|
- **kuwo:singer**: 酷我音乐 - 歌手
|
||||||
- **kuwo:song**: 酷我音乐
|
- **kuwo:song**: 酷我音乐
|
||||||
- **la7.it**
|
- **la7.it**
|
||||||
- **Laola1Tv**
|
- **laola1tv**
|
||||||
|
- **laola1tv:embed**
|
||||||
- **LCI**
|
- **LCI**
|
||||||
- **Lcp**
|
- **Lcp**
|
||||||
- **LcpPlay**
|
- **LcpPlay**
|
||||||
@ -402,6 +404,8 @@
|
|||||||
- **MatchTV**
|
- **MatchTV**
|
||||||
- **MDR**: MDR.DE and KiKA
|
- **MDR**: MDR.DE and KiKA
|
||||||
- **media.ccc.de**
|
- **media.ccc.de**
|
||||||
|
- **Meipai**: 美拍
|
||||||
|
- **MelonVOD**
|
||||||
- **META**
|
- **META**
|
||||||
- **metacafe**
|
- **metacafe**
|
||||||
- **Metacritic**
|
- **Metacritic**
|
||||||
@ -513,6 +517,7 @@
|
|||||||
- **NRKPlaylist**
|
- **NRKPlaylist**
|
||||||
- **NRKSkole**: NRK Skole
|
- **NRKSkole**: NRK Skole
|
||||||
- **NRKTV**: NRK TV and NRK Radio
|
- **NRKTV**: NRK TV and NRK Radio
|
||||||
|
- **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte
|
||||||
- **ntv.ru**
|
- **ntv.ru**
|
||||||
- **Nuvid**
|
- **Nuvid**
|
||||||
- **NYTimes**
|
- **NYTimes**
|
||||||
@ -523,6 +528,7 @@
|
|||||||
- **Odnoklassniki**
|
- **Odnoklassniki**
|
||||||
- **OktoberfestTV**
|
- **OktoberfestTV**
|
||||||
- **on.aol.com**
|
- **on.aol.com**
|
||||||
|
- **OnDemandKorea**
|
||||||
- **onet.tv**
|
- **onet.tv**
|
||||||
- **onet.tv:channel**
|
- **onet.tv:channel**
|
||||||
- **OnionStudios**
|
- **OnionStudios**
|
||||||
@ -546,6 +552,7 @@
|
|||||||
- **PhilharmonieDeParis**: Philharmonie de Paris
|
- **PhilharmonieDeParis**: Philharmonie de Paris
|
||||||
- **phoenix.de**
|
- **phoenix.de**
|
||||||
- **Photobucket**
|
- **Photobucket**
|
||||||
|
- **Piksel**
|
||||||
- **Pinkbike**
|
- **Pinkbike**
|
||||||
- **Pladform**
|
- **Pladform**
|
||||||
- **play.fm**
|
- **play.fm**
|
||||||
@ -784,10 +791,13 @@
|
|||||||
- **Tweakers**
|
- **Tweakers**
|
||||||
- **twitch:chapter**
|
- **twitch:chapter**
|
||||||
- **twitch:clips**
|
- **twitch:clips**
|
||||||
- **twitch:past_broadcasts**
|
|
||||||
- **twitch:profile**
|
- **twitch:profile**
|
||||||
- **twitch:stream**
|
- **twitch:stream**
|
||||||
- **twitch:video**
|
- **twitch:video**
|
||||||
|
- **twitch:videos:all**
|
||||||
|
- **twitch:videos:highlights**
|
||||||
|
- **twitch:videos:past-broadcasts**
|
||||||
|
- **twitch:videos:uploads**
|
||||||
- **twitch:vod**
|
- **twitch:vod**
|
||||||
- **twitter**
|
- **twitter**
|
||||||
- **twitter:amplify**
|
- **twitter:amplify**
|
||||||
@ -795,6 +805,7 @@
|
|||||||
- **udemy**
|
- **udemy**
|
||||||
- **udemy:course**
|
- **udemy:course**
|
||||||
- **UDNEmbed**: 聯合影音
|
- **UDNEmbed**: 聯合影音
|
||||||
|
- **UKTVPlay**
|
||||||
- **Unistra**
|
- **Unistra**
|
||||||
- **uol.com.br**
|
- **uol.com.br**
|
||||||
- **uplynk**
|
- **uplynk**
|
||||||
@ -859,6 +870,9 @@
|
|||||||
- **Vimple**: Vimple - one-click video hosting
|
- **Vimple**: Vimple - one-click video hosting
|
||||||
- **Vine**
|
- **Vine**
|
||||||
- **vine:user**
|
- **vine:user**
|
||||||
|
- **Viu**
|
||||||
|
- **viu:ott**
|
||||||
|
- **viu:playlist**
|
||||||
- **Vivo**: vivo.sx
|
- **Vivo**: vivo.sx
|
||||||
- **vk**: VK
|
- **vk**: VK
|
||||||
- **vk:uservideos**: VK - User's Videos
|
- **vk:uservideos**: VK - User's Videos
|
||||||
@ -873,6 +887,7 @@
|
|||||||
- **VRT**
|
- **VRT**
|
||||||
- **vube**: Vube.com
|
- **vube**: Vube.com
|
||||||
- **VuClip**
|
- **VuClip**
|
||||||
|
- **VVVVID**
|
||||||
- **VyboryMos**
|
- **VyboryMos**
|
||||||
- **Vzaar**
|
- **Vzaar**
|
||||||
- **Walla**
|
- **Walla**
|
||||||
|
@ -70,6 +70,7 @@ from youtube_dl.utils import (
|
|||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
url_basename,
|
url_basename,
|
||||||
base_url,
|
base_url,
|
||||||
|
urljoin,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urshift,
|
urshift,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
@ -445,6 +446,23 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/')
|
self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/')
|
||||||
self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/')
|
self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/')
|
||||||
|
|
||||||
|
def test_urljoin(self):
|
||||||
|
self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
||||||
|
self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt')
|
||||||
|
self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
||||||
|
self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
||||||
|
self.assertEqual(urljoin('http://foo.de', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
||||||
|
self.assertEqual(urljoin('http://foo.de/', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
||||||
|
self.assertEqual(urljoin('http://foo.de/', '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt')
|
||||||
|
self.assertEqual(urljoin(None, 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
||||||
|
self.assertEqual(urljoin(None, '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt')
|
||||||
|
self.assertEqual(urljoin('', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
||||||
|
self.assertEqual(urljoin(['foobar'], 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
||||||
|
self.assertEqual(urljoin('http://foo.de/', None), None)
|
||||||
|
self.assertEqual(urljoin('http://foo.de/', ''), None)
|
||||||
|
self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
|
||||||
|
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
|
||||||
|
|
||||||
def test_parse_age_limit(self):
|
def test_parse_age_limit(self):
|
||||||
self.assertEqual(parse_age_limit(None), None)
|
self.assertEqual(parse_age_limit(None), None)
|
||||||
self.assertEqual(parse_age_limit(False), None)
|
self.assertEqual(parse_age_limit(False), None)
|
||||||
|
@ -65,6 +65,9 @@ class HlsFD(FragmentFD):
|
|||||||
s = manifest.decode('utf-8', 'ignore')
|
s = manifest.decode('utf-8', 'ignore')
|
||||||
|
|
||||||
if not self.can_download(s, info_dict):
|
if not self.can_download(s, info_dict):
|
||||||
|
if info_dict.get('extra_param_to_segment_url'):
|
||||||
|
self.report_error('pycrypto not found. Please install it.')
|
||||||
|
return False
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'hlsnative has detected features it does not support, '
|
'hlsnative has detected features it does not support, '
|
||||||
'extraction will be delegated to ffmpeg')
|
'extraction will be delegated to ffmpeg')
|
||||||
|
@ -8,6 +8,7 @@ from .common import InfoExtractor
|
|||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -15,18 +16,33 @@ from ..utils import (
|
|||||||
class ACastIE(InfoExtractor):
|
class ACastIE(InfoExtractor):
|
||||||
IE_NAME = 'acast'
|
IE_NAME = 'acast'
|
||||||
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)'
|
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
|
# test with one bling
|
||||||
'url': 'https://www.acast.com/condenasttraveler/-where-are-you-taipei-101-taiwan',
|
'url': 'https://www.acast.com/condenasttraveler/-where-are-you-taipei-101-taiwan',
|
||||||
'md5': 'ada3de5a1e3a2a381327d749854788bb',
|
'md5': 'ada3de5a1e3a2a381327d749854788bb',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
|
'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': '"Where Are You?": Taipei 101, Taiwan',
|
'title': '"Where Are You?": Taipei 101, Taiwan',
|
||||||
'timestamp': 1196172000000,
|
'timestamp': 1196172000,
|
||||||
|
'upload_date': '20071127',
|
||||||
'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
|
'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
|
||||||
'duration': 211,
|
'duration': 211,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
# test with multiple blings
|
||||||
|
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||||
|
'md5': '55c0097badd7095f494c99a172f86501',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': '2. Raggarmordet - Röster ur det förflutna',
|
||||||
|
'timestamp': 1477346700,
|
||||||
|
'upload_date': '20161024',
|
||||||
|
'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
|
||||||
|
'duration': 2797,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
channel, display_id = re.match(self._VALID_URL, url).groups()
|
channel, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
@ -35,11 +51,11 @@ class ACastIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': compat_str(cast_data['id']),
|
'id': compat_str(cast_data['id']),
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'url': cast_data['blings'][0]['audio'],
|
'url': [b['audio'] for b in cast_data['blings'] if b['type'] == 'BlingAudio'][0],
|
||||||
'title': cast_data['name'],
|
'title': cast_data['name'],
|
||||||
'description': cast_data.get('description'),
|
'description': cast_data.get('description'),
|
||||||
'thumbnail': cast_data.get('image'),
|
'thumbnail': cast_data.get('image'),
|
||||||
'timestamp': int_or_none(cast_data.get('publishingDate')),
|
'timestamp': parse_iso8601(cast_data.get('publishingDate')),
|
||||||
'duration': int_or_none(cast_data.get('duration')),
|
'duration': int_or_none(cast_data.get('duration')),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -232,13 +232,16 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
"""Return a list of all Brightcove URLs from the webpage """
|
"""Return a list of all Brightcove URLs from the webpage """
|
||||||
|
|
||||||
url_m = re.search(
|
url_m = re.search(
|
||||||
r'<meta\s+property=[\'"]og:video[\'"]\s+content=[\'"](https?://(?:secure|c)\.brightcove.com/[^\'"]+)[\'"]',
|
r'''(?x)
|
||||||
webpage)
|
<meta\s+
|
||||||
|
(?:property|itemprop)=([\'"])(?:og:video|embedURL)\1[^>]+
|
||||||
|
content=([\'"])(?P<url>https?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2
|
||||||
|
''', webpage)
|
||||||
if url_m:
|
if url_m:
|
||||||
url = unescapeHTML(url_m.group(1))
|
url = unescapeHTML(url_m.group('url'))
|
||||||
# Some sites don't add it, we can't download with this url, for example:
|
# Some sites don't add it, we can't download with this url, for example:
|
||||||
# http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
|
# http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
|
||||||
if 'playerKey' in url or 'videoId' in url:
|
if 'playerKey' in url or 'videoId' in url or 'idVideo' in url:
|
||||||
return [url]
|
return [url]
|
||||||
|
|
||||||
matches = re.findall(
|
matches = re.findall(
|
||||||
@ -259,7 +262,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
url, smuggled_data = unsmuggle_url(url, {})
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
|
|
||||||
# Change the 'videoId' and others field to '@videoPlayer'
|
# Change the 'videoId' and others field to '@videoPlayer'
|
||||||
url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url)
|
url = re.sub(r'(?<=[?&])(videoI(d|D)|idVideo|bctid)', '%40videoPlayer', url)
|
||||||
# Change bckey (used by bcove.me urls) to playerKey
|
# Change bckey (used by bcove.me urls) to playerKey
|
||||||
url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
|
url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@ -548,7 +551,7 @@ class BrightcoveNewIE(InfoExtractor):
|
|||||||
container = source.get('container')
|
container = source.get('container')
|
||||||
ext = mimetype2ext(source.get('type'))
|
ext = mimetype2ext(source.get('type'))
|
||||||
src = source.get('src')
|
src = source.get('src')
|
||||||
if ext == 'ism':
|
if ext == 'ism' or container == 'WVM':
|
||||||
continue
|
continue
|
||||||
elif ext == 'm3u8' or container == 'M2TS':
|
elif ext == 'm3u8' or container == 'M2TS':
|
||||||
if not src:
|
if not src:
|
||||||
|
@ -105,7 +105,8 @@ class CanalplusIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
[r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)',
|
[r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)',
|
||||||
r'id=["\']canal_video_player(?P<id>\d+)'],
|
r'id=["\']canal_video_player(?P<id>\d+)',
|
||||||
|
r'data-video=["\'](?P<id>\d+)'],
|
||||||
webpage, 'video id', group='id')
|
webpage, 'video id', group='id')
|
||||||
|
|
||||||
info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
|
info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
|
||||||
|
@ -89,6 +89,9 @@ class CanvasIE(InfoExtractor):
|
|||||||
elif format_type == 'HDS':
|
elif format_type == 'HDS':
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
format_url, display_id, f4m_id=format_type, fatal=False))
|
format_url, display_id, f4m_id=format_type, fatal=False))
|
||||||
|
elif format_type == 'MPEG_DASH':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
format_url, display_id, mpd_id=format_type, fatal=False))
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_type,
|
'format_id': format_type,
|
||||||
|
@ -283,11 +283,6 @@ class CBCWatchVideoIE(CBCWatchBaseIE):
|
|||||||
formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False)
|
formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False)
|
||||||
if len(formats) < 2:
|
if len(formats) < 2:
|
||||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
|
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
|
||||||
# Despite metadata in m3u8 all video+audio formats are
|
|
||||||
# actually video-only (no audio)
|
|
||||||
for f in formats:
|
|
||||||
if f.get('acodec') != 'none' and f.get('vcodec') != 'none':
|
|
||||||
f['acodec'] = 'none'
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
|
99
youtube_dl/extractor/ccma.py
Normal file
99
youtube_dl/extractor/ccma.py
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
parse_iso8601,
|
||||||
|
clean_html,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CCMAIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
|
||||||
|
'md5': '7296ca43977c8ea4469e719c609b0871',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5630208',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'L\'espot de La Marató de TV3',
|
||||||
|
'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
|
||||||
|
'timestamp': 1470918540,
|
||||||
|
'upload_date': '20160811',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
||||||
|
'md5': 'fa3e38f269329a278271276330261425',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '943685',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'El Consell de Savis analitza el derbi',
|
||||||
|
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
|
||||||
|
'upload_date': '20171205',
|
||||||
|
'timestamp': 1512507300,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
media_type, media_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
media_data = {}
|
||||||
|
formats = []
|
||||||
|
profiles = ['pc'] if media_type == 'audio' else ['mobil', 'pc']
|
||||||
|
for i, profile in enumerate(profiles):
|
||||||
|
md = self._download_json('http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
|
||||||
|
'media': media_type,
|
||||||
|
'idint': media_id,
|
||||||
|
'profile': profile,
|
||||||
|
}, fatal=False)
|
||||||
|
if md:
|
||||||
|
media_data = md
|
||||||
|
media_url = media_data.get('media', {}).get('url')
|
||||||
|
if media_url:
|
||||||
|
formats.append({
|
||||||
|
'format_id': profile,
|
||||||
|
'url': media_url,
|
||||||
|
'quality': i,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
informacio = media_data['informacio']
|
||||||
|
title = informacio['titol']
|
||||||
|
durada = informacio.get('durada', {})
|
||||||
|
duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
|
||||||
|
timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc'))
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
subtitols = media_data.get('subtitols', {})
|
||||||
|
if subtitols:
|
||||||
|
sub_url = subtitols.get('url')
|
||||||
|
if sub_url:
|
||||||
|
subtitles.setdefault(
|
||||||
|
subtitols.get('iso') or subtitols.get('text') or 'ca', []).append({
|
||||||
|
'url': sub_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
imatges = media_data.get('imatges', {})
|
||||||
|
if imatges:
|
||||||
|
thumbnail_url = imatges.get('url')
|
||||||
|
if thumbnail_url:
|
||||||
|
thumbnails = [{
|
||||||
|
'url': thumbnail_url,
|
||||||
|
'width': int_or_none(imatges.get('amplada')),
|
||||||
|
'height': int_or_none(imatges.get('alcada')),
|
||||||
|
}]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': media_id,
|
||||||
|
'title': title,
|
||||||
|
'description': clean_html(informacio.get('descripcio')),
|
||||||
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'thumnails': thumbnails,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -59,6 +59,7 @@ from ..utils import (
|
|||||||
parse_m3u8_attributes,
|
parse_m3u8_attributes,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
parse_codecs,
|
parse_codecs,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -188,9 +189,10 @@ class InfoExtractor(object):
|
|||||||
uploader_url: Full URL to a personal webpage of the video uploader.
|
uploader_url: Full URL to a personal webpage of the video uploader.
|
||||||
location: Physical location where the video was filmed.
|
location: Physical location where the video was filmed.
|
||||||
subtitles: The available subtitles as a dictionary in the format
|
subtitles: The available subtitles as a dictionary in the format
|
||||||
{language: subformats}. "subformats" is a list sorted from
|
{tag: subformats}. "tag" is usually a language code, and
|
||||||
lower to higher preference, each element is a dictionary
|
"subformats" is a list sorted from lower to higher
|
||||||
with the "ext" entry and one of:
|
preference, each element is a dictionary with the "ext"
|
||||||
|
entry and one of:
|
||||||
* "data": The subtitles file contents
|
* "data": The subtitles file contents
|
||||||
* "url": A URL pointing to the subtitles file
|
* "url": A URL pointing to the subtitles file
|
||||||
"ext" will be calculated from URL if missing
|
"ext" will be calculated from URL if missing
|
||||||
@ -1224,6 +1226,7 @@ class InfoExtractor(object):
|
|||||||
'protocol': entry_protocol,
|
'protocol': entry_protocol,
|
||||||
'preference': preference,
|
'preference': preference,
|
||||||
}]
|
}]
|
||||||
|
audio_in_video_stream = {}
|
||||||
last_info = {}
|
last_info = {}
|
||||||
last_media = {}
|
last_media = {}
|
||||||
for line in m3u8_doc.splitlines():
|
for line in m3u8_doc.splitlines():
|
||||||
@ -1233,25 +1236,32 @@ class InfoExtractor(object):
|
|||||||
media = parse_m3u8_attributes(line)
|
media = parse_m3u8_attributes(line)
|
||||||
media_type = media.get('TYPE')
|
media_type = media.get('TYPE')
|
||||||
if media_type in ('VIDEO', 'AUDIO'):
|
if media_type in ('VIDEO', 'AUDIO'):
|
||||||
|
group_id = media.get('GROUP-ID')
|
||||||
media_url = media.get('URI')
|
media_url = media.get('URI')
|
||||||
if media_url:
|
if media_url:
|
||||||
format_id = []
|
format_id = []
|
||||||
for v in (media.get('GROUP-ID'), media.get('NAME')):
|
for v in (group_id, media.get('NAME')):
|
||||||
if v:
|
if v:
|
||||||
format_id.append(v)
|
format_id.append(v)
|
||||||
formats.append({
|
f = {
|
||||||
'format_id': '-'.join(format_id),
|
'format_id': '-'.join(format_id),
|
||||||
'url': format_url(media_url),
|
'url': format_url(media_url),
|
||||||
'language': media.get('LANGUAGE'),
|
'language': media.get('LANGUAGE'),
|
||||||
'vcodec': 'none' if media_type == 'AUDIO' else None,
|
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'protocol': entry_protocol,
|
'protocol': entry_protocol,
|
||||||
'preference': preference,
|
'preference': preference,
|
||||||
})
|
}
|
||||||
|
if media_type == 'AUDIO':
|
||||||
|
f['vcodec'] = 'none'
|
||||||
|
if group_id and not audio_in_video_stream.get(group_id):
|
||||||
|
audio_in_video_stream[group_id] = False
|
||||||
|
formats.append(f)
|
||||||
else:
|
else:
|
||||||
# When there is no URI in EXT-X-MEDIA let this tag's
|
# When there is no URI in EXT-X-MEDIA let this tag's
|
||||||
# data be used by regular URI lines below
|
# data be used by regular URI lines below
|
||||||
last_media = media
|
last_media = media
|
||||||
|
if media_type == 'AUDIO' and group_id:
|
||||||
|
audio_in_video_stream[group_id] = True
|
||||||
elif line.startswith('#') or not line.strip():
|
elif line.startswith('#') or not line.strip():
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
@ -1295,6 +1305,9 @@ class InfoExtractor(object):
|
|||||||
'abr': abr,
|
'abr': abr,
|
||||||
})
|
})
|
||||||
f.update(parse_codecs(last_info.get('CODECS')))
|
f.update(parse_codecs(last_info.get('CODECS')))
|
||||||
|
if audio_in_video_stream.get(last_info.get('AUDIO')) is False:
|
||||||
|
# TODO: update acodec for for audio only formats with the same GROUP-ID
|
||||||
|
f['acodec'] = 'none'
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
last_info = {}
|
last_info = {}
|
||||||
last_media = {}
|
last_media = {}
|
||||||
@ -1624,11 +1637,6 @@ class InfoExtractor(object):
|
|||||||
extract_Initialization(segment_template)
|
extract_Initialization(segment_template)
|
||||||
return ms_info
|
return ms_info
|
||||||
|
|
||||||
def combine_url(base_url, target_url):
|
|
||||||
if re.match(r'^https?://', target_url):
|
|
||||||
return target_url
|
|
||||||
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
|
|
||||||
|
|
||||||
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
||||||
formats = []
|
formats = []
|
||||||
for period in mpd_doc.findall(_add_ns('Period')):
|
for period in mpd_doc.findall(_add_ns('Period')):
|
||||||
@ -1678,12 +1686,11 @@ class InfoExtractor(object):
|
|||||||
'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000),
|
'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000),
|
||||||
'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
|
'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
|
||||||
'fps': int_or_none(representation_attrib.get('frameRate')),
|
'fps': int_or_none(representation_attrib.get('frameRate')),
|
||||||
'vcodec': 'none' if content_type == 'audio' else representation_attrib.get('codecs'),
|
|
||||||
'acodec': 'none' if content_type == 'video' else representation_attrib.get('codecs'),
|
|
||||||
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
|
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
|
||||||
'format_note': 'DASH %s' % content_type,
|
'format_note': 'DASH %s' % content_type,
|
||||||
'filesize': filesize,
|
'filesize': filesize,
|
||||||
}
|
}
|
||||||
|
f.update(parse_codecs(representation_attrib.get('codecs')))
|
||||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||||
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
|
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
|
||||||
|
|
||||||
@ -1767,7 +1774,7 @@ class InfoExtractor(object):
|
|||||||
f['fragments'].append({'url': initialization_url})
|
f['fragments'].append({'url': initialization_url})
|
||||||
f['fragments'].extend(representation_ms_info['fragments'])
|
f['fragments'].extend(representation_ms_info['fragments'])
|
||||||
for fragment in f['fragments']:
|
for fragment in f['fragments']:
|
||||||
fragment['url'] = combine_url(base_url, fragment['url'])
|
fragment['url'] = urljoin(base_url, fragment['url'])
|
||||||
try:
|
try:
|
||||||
existing_format = next(
|
existing_format = next(
|
||||||
fo for fo in formats
|
fo for fo in formats
|
||||||
@ -1881,7 +1888,7 @@ class InfoExtractor(object):
|
|||||||
})
|
})
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'):
|
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None):
|
||||||
def absolute_url(video_url):
|
def absolute_url(video_url):
|
||||||
return compat_urlparse.urljoin(base_url, video_url)
|
return compat_urlparse.urljoin(base_url, video_url)
|
||||||
|
|
||||||
@ -1898,11 +1905,16 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _media_formats(src, cur_media_type):
|
def _media_formats(src, cur_media_type):
|
||||||
full_url = absolute_url(src)
|
full_url = absolute_url(src)
|
||||||
if determine_ext(full_url) == 'm3u8':
|
ext = determine_ext(full_url)
|
||||||
|
if ext == 'm3u8':
|
||||||
is_plain_url = False
|
is_plain_url = False
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
full_url, video_id, ext='mp4',
|
full_url, video_id, ext='mp4',
|
||||||
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id)
|
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id)
|
||||||
|
elif ext == 'mpd':
|
||||||
|
is_plain_url = False
|
||||||
|
formats = self._extract_mpd_formats(
|
||||||
|
full_url, video_id, mpd_id=mpd_id)
|
||||||
else:
|
else:
|
||||||
is_plain_url = True
|
is_plain_url = True
|
||||||
formats = [{
|
formats = [{
|
||||||
|
@ -8,7 +8,7 @@ from ..utils import orderedSet
|
|||||||
|
|
||||||
|
|
||||||
class CTVNewsIE(InfoExtractor):
|
class CTVNewsIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
|
_VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ctvnews.ca/video?clipId=901995',
|
'url': 'http://www.ctvnews.ca/video?clipId=901995',
|
||||||
'md5': '10deb320dc0ccb8d01d34d12fc2ea672',
|
'md5': '10deb320dc0ccb8d01d34d12fc2ea672',
|
||||||
@ -40,6 +40,9 @@ class CTVNewsIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ctvnews.ca/canadiens-send-p-k-subban-to-nashville-in-blockbuster-trade-1.2967231',
|
'url': 'http://www.ctvnews.ca/canadiens-send-p-k-subban-to-nashville-in-blockbuster-trade-1.2967231',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://vancouverisland.ctvnews.ca/video?clipId=761241',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -8,6 +8,7 @@ import time
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urlparse
|
from ..compat import compat_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
USER_AGENTS,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
)
|
)
|
||||||
@ -102,10 +103,16 @@ class DPlayIE(InfoExtractor):
|
|||||||
manifest_url, video_id, ext='mp4',
|
manifest_url, video_id, ext='mp4',
|
||||||
entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False)
|
entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False)
|
||||||
# Sometimes final URLs inside m3u8 are unsigned, let's fix this
|
# Sometimes final URLs inside m3u8 are unsigned, let's fix this
|
||||||
# ourselves
|
# ourselves. Also fragments' URLs are only served signed for
|
||||||
|
# Safari user agent.
|
||||||
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(manifest_url).query)
|
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(manifest_url).query)
|
||||||
for m3u8_format in m3u8_formats:
|
for m3u8_format in m3u8_formats:
|
||||||
m3u8_format['url'] = update_url_query(m3u8_format['url'], query)
|
m3u8_format.update({
|
||||||
|
'url': update_url_query(m3u8_format['url'], query),
|
||||||
|
'http_headers': {
|
||||||
|
'User-Agent': USER_AGENTS['Safari'],
|
||||||
|
},
|
||||||
|
})
|
||||||
formats.extend(m3u8_formats)
|
formats.extend(m3u8_formats)
|
||||||
elif protocol == 'hds':
|
elif protocol == 'hds':
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
@ -150,6 +150,7 @@ from .cbsnews import (
|
|||||||
)
|
)
|
||||||
from .cbssports import CBSSportsIE
|
from .cbssports import CBSSportsIE
|
||||||
from .ccc import CCCIE
|
from .ccc import CCCIE
|
||||||
|
from .ccma import CCMAIE
|
||||||
from .cctv import CCTVIE
|
from .cctv import CCTVIE
|
||||||
from .cda import CDAIE
|
from .cda import CDAIE
|
||||||
from .ceskatelevize import CeskaTelevizeIE
|
from .ceskatelevize import CeskaTelevizeIE
|
||||||
@ -446,7 +447,10 @@ from .kuwo import (
|
|||||||
KuwoMvIE,
|
KuwoMvIE,
|
||||||
)
|
)
|
||||||
from .la7 import LA7IE
|
from .la7 import LA7IE
|
||||||
from .laola1tv import Laola1TvIE
|
from .laola1tv import (
|
||||||
|
Laola1TvEmbedIE,
|
||||||
|
Laola1TvIE,
|
||||||
|
)
|
||||||
from .lci import LCIIE
|
from .lci import LCIIE
|
||||||
from .lcp import (
|
from .lcp import (
|
||||||
LcpPlayIE,
|
LcpPlayIE,
|
||||||
@ -498,6 +502,8 @@ from .mangomolo import (
|
|||||||
)
|
)
|
||||||
from .matchtv import MatchTVIE
|
from .matchtv import MatchTVIE
|
||||||
from .mdr import MDRIE
|
from .mdr import MDRIE
|
||||||
|
from .meipai import MeipaiIE
|
||||||
|
from .melonvod import MelonVODIE
|
||||||
from .meta import METAIE
|
from .meta import METAIE
|
||||||
from .metacafe import MetacafeIE
|
from .metacafe import MetacafeIE
|
||||||
from .metacritic import MetacriticIE
|
from .metacritic import MetacriticIE
|
||||||
@ -649,6 +655,7 @@ from .nrk import (
|
|||||||
NRKPlaylistIE,
|
NRKPlaylistIE,
|
||||||
NRKSkoleIE,
|
NRKSkoleIE,
|
||||||
NRKTVIE,
|
NRKTVIE,
|
||||||
|
NRKTVDirekteIE,
|
||||||
)
|
)
|
||||||
from .ntvde import NTVDeIE
|
from .ntvde import NTVDeIE
|
||||||
from .ntvru import NTVRuIE
|
from .ntvru import NTVRuIE
|
||||||
@ -661,6 +668,7 @@ from .nzz import NZZIE
|
|||||||
from .odatv import OdaTVIE
|
from .odatv import OdaTVIE
|
||||||
from .odnoklassniki import OdnoklassnikiIE
|
from .odnoklassniki import OdnoklassnikiIE
|
||||||
from .oktoberfesttv import OktoberfestTVIE
|
from .oktoberfesttv import OktoberfestTVIE
|
||||||
|
from .ondemandkorea import OnDemandKoreaIE
|
||||||
from .onet import (
|
from .onet import (
|
||||||
OnetIE,
|
OnetIE,
|
||||||
OnetChannelIE,
|
OnetChannelIE,
|
||||||
@ -691,6 +699,7 @@ from .periscope import (
|
|||||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||||
from .phoenix import PhoenixIE
|
from .phoenix import PhoenixIE
|
||||||
from .photobucket import PhotobucketIE
|
from .photobucket import PhotobucketIE
|
||||||
|
from .piksel import PikselIE
|
||||||
from .pinkbike import PinkbikeIE
|
from .pinkbike import PinkbikeIE
|
||||||
from .pladform import PladformIE
|
from .pladform import PladformIE
|
||||||
from .playfm import PlayFMIE
|
from .playfm import PlayFMIE
|
||||||
@ -998,7 +1007,10 @@ from .twitch import (
|
|||||||
TwitchChapterIE,
|
TwitchChapterIE,
|
||||||
TwitchVodIE,
|
TwitchVodIE,
|
||||||
TwitchProfileIE,
|
TwitchProfileIE,
|
||||||
|
TwitchAllVideosIE,
|
||||||
|
TwitchUploadsIE,
|
||||||
TwitchPastBroadcastsIE,
|
TwitchPastBroadcastsIE,
|
||||||
|
TwitchHighlightsIE,
|
||||||
TwitchStreamIE,
|
TwitchStreamIE,
|
||||||
TwitchClipsIE,
|
TwitchClipsIE,
|
||||||
)
|
)
|
||||||
@ -1012,6 +1024,7 @@ from .udemy import (
|
|||||||
UdemyCourseIE
|
UdemyCourseIE
|
||||||
)
|
)
|
||||||
from .udn import UDNEmbedIE
|
from .udn import UDNEmbedIE
|
||||||
|
from .uktvplay import UKTVPlayIE
|
||||||
from .digiteka import DigitekaIE
|
from .digiteka import DigitekaIE
|
||||||
from .unistra import UnistraIE
|
from .unistra import UnistraIE
|
||||||
from .uol import UOLIE
|
from .uol import UOLIE
|
||||||
@ -1095,6 +1108,11 @@ from .viki import (
|
|||||||
VikiIE,
|
VikiIE,
|
||||||
VikiChannelIE,
|
VikiChannelIE,
|
||||||
)
|
)
|
||||||
|
from .viu import (
|
||||||
|
ViuIE,
|
||||||
|
ViuPlaylistIE,
|
||||||
|
ViuOTTIE,
|
||||||
|
)
|
||||||
from .vk import (
|
from .vk import (
|
||||||
VKIE,
|
VKIE,
|
||||||
VKUserVideosIE,
|
VKUserVideosIE,
|
||||||
@ -1109,6 +1127,7 @@ from .vporn import VpornIE
|
|||||||
from .vrt import VRTIE
|
from .vrt import VRTIE
|
||||||
from .vube import VubeIE
|
from .vube import VubeIE
|
||||||
from .vuclip import VuClipIE
|
from .vuclip import VuClipIE
|
||||||
|
from .vvvvid import VVVVIDIE
|
||||||
from .vyborymos import VyboryMosIE
|
from .vyborymos import VyboryMosIE
|
||||||
from .vzaar import VzaarIE
|
from .vzaar import VzaarIE
|
||||||
from .walla import WallaIE
|
from .walla import WallaIE
|
||||||
|
@ -27,7 +27,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:
|
(?:
|
||||||
https?://
|
https?://
|
||||||
(?:[\w-]+\.)?facebook\.com/
|
(?:[\w-]+\.)?(?:facebook\.com|facebookcorewwwi\.onion)/
|
||||||
(?:[^#]*?\#!/)?
|
(?:[^#]*?\#!/)?
|
||||||
(?:
|
(?:
|
||||||
(?:
|
(?:
|
||||||
@ -150,6 +150,9 @@ class FacebookIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
|
'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -244,8 +247,10 @@ class FacebookIE(InfoExtractor):
|
|||||||
r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
|
r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
|
||||||
for item in server_js_data.get('instances', []):
|
for item in server_js_data.get('instances', []):
|
||||||
if item[1][0] == 'VideoConfig':
|
if item[1][0] == 'VideoConfig':
|
||||||
video_data = item[2][0]['videoData']
|
video_item = item[2][0]
|
||||||
break
|
if video_item.get('video_id') == video_id:
|
||||||
|
video_data = video_item['videoData']
|
||||||
|
break
|
||||||
|
|
||||||
if not video_data:
|
if not video_data:
|
||||||
if not fatal_if_no_video:
|
if not fatal_if_no_video:
|
||||||
@ -255,6 +260,8 @@ class FacebookIE(InfoExtractor):
|
|||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'The video is not available, Facebook said: "%s"' % m_msg.group(1),
|
'The video is not available, Facebook said: "%s"' % m_msg.group(1),
|
||||||
expected=True)
|
expected=True)
|
||||||
|
elif '>You must log in to continue' in webpage:
|
||||||
|
self.raise_login_required()
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Cannot parse data')
|
raise ExtractorError('Cannot parse data')
|
||||||
|
|
||||||
|
@ -75,6 +75,7 @@ from .facebook import FacebookIE
|
|||||||
from .soundcloud import SoundcloudIE
|
from .soundcloud import SoundcloudIE
|
||||||
from .vbox7 import Vbox7IE
|
from .vbox7 import Vbox7IE
|
||||||
from .dbtv import DBTVIE
|
from .dbtv import DBTVIE
|
||||||
|
from .piksel import PikselIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@ -343,10 +344,10 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'skip': 'There is a limit of 200 free downloads / month for the test song',
|
'skip': 'There is a limit of 200 free downloads / month for the test song',
|
||||||
},
|
},
|
||||||
# embedded brightcove video
|
|
||||||
# it also tests brightcove videos that need to set the 'Referer' in the
|
|
||||||
# http requests
|
|
||||||
{
|
{
|
||||||
|
# embedded brightcove video
|
||||||
|
# it also tests brightcove videos that need to set the 'Referer'
|
||||||
|
# in the http requests
|
||||||
'add_ie': ['BrightcoveLegacy'],
|
'add_ie': ['BrightcoveLegacy'],
|
||||||
'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
|
'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -360,6 +361,24 @@ class GenericIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# embedded with itemprop embedURL and video id spelled as `idVideo`
|
||||||
|
'add_id': ['BrightcoveLegacy'],
|
||||||
|
'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5255628253001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:37c519b1128915607601e75a87995fc0',
|
||||||
|
'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
|
||||||
|
'uploader': 'BFM BUSINESS',
|
||||||
|
'uploader_id': '876450612001',
|
||||||
|
'timestamp': 1482255315,
|
||||||
|
'upload_date': '20161220',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
# https://github.com/rg3/youtube-dl/issues/2253
|
# https://github.com/rg3/youtube-dl/issues/2253
|
||||||
'url': 'http://bcove.me/i6nfkrc3',
|
'url': 'http://bcove.me/i6nfkrc3',
|
||||||
@ -972,6 +991,20 @@ class GenericIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# Kaltura embedded, some fileExt broken (#11480)
|
||||||
|
'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1_sgtvehim',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Our "Standard Models" of particle physics and cosmology',
|
||||||
|
'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
|
||||||
|
'timestamp': 1321158993,
|
||||||
|
'upload_date': '20111113',
|
||||||
|
'uploader_id': 'kps1',
|
||||||
|
},
|
||||||
|
'add_ie': ['Kaltura'],
|
||||||
|
},
|
||||||
# Eagle.Platform embed (generic URL)
|
# Eagle.Platform embed (generic URL)
|
||||||
{
|
{
|
||||||
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
||||||
@ -2211,6 +2244,11 @@ class GenericIE(InfoExtractor):
|
|||||||
if arkena_url:
|
if arkena_url:
|
||||||
return self.url_result(arkena_url, ArkenaIE.ie_key())
|
return self.url_result(arkena_url, ArkenaIE.ie_key())
|
||||||
|
|
||||||
|
# Look for Piksel embeds
|
||||||
|
piksel_url = PikselIE._extract_url(webpage)
|
||||||
|
if piksel_url:
|
||||||
|
return self.url_result(piksel_url, PikselIE.ie_key())
|
||||||
|
|
||||||
# Look for Limelight embeds
|
# Look for Limelight embeds
|
||||||
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
|
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
|
||||||
if mobj:
|
if mobj:
|
||||||
|
@ -11,6 +11,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -110,10 +111,14 @@ class JWPlatformBaseIE(InfoExtractor):
|
|||||||
tracks = video_data.get('tracks')
|
tracks = video_data.get('tracks')
|
||||||
if tracks and isinstance(tracks, list):
|
if tracks and isinstance(tracks, list):
|
||||||
for track in tracks:
|
for track in tracks:
|
||||||
if track.get('file') and track.get('kind') == 'captions':
|
if track.get('kind') != 'captions':
|
||||||
subtitles.setdefault(track.get('label') or 'en', []).append({
|
continue
|
||||||
'url': self._proto_relative_url(track['file'])
|
track_url = urljoin(base_url, track.get('file'))
|
||||||
})
|
if not track_url:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault(track.get('label') or 'en', []).append({
|
||||||
|
'url': self._proto_relative_url(track_url)
|
||||||
|
})
|
||||||
|
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': this_video_id,
|
'id': this_video_id,
|
||||||
@ -121,7 +126,7 @@ class JWPlatformBaseIE(InfoExtractor):
|
|||||||
'description': video_data.get('description'),
|
'description': video_data.get('description'),
|
||||||
'thumbnail': self._proto_relative_url(video_data.get('image')),
|
'thumbnail': self._proto_relative_url(video_data.get('image')),
|
||||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||||
'duration': float_or_none(jwplayer_data.get('duration')),
|
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
})
|
})
|
||||||
|
@ -107,7 +107,7 @@ class KalturaIE(InfoExtractor):
|
|||||||
(?P<q1>['\"])wid(?P=q1)\s*:\s*
|
(?P<q1>['\"])wid(?P=q1)\s*:\s*
|
||||||
(?P<q2>['\"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
|
(?P<q2>['\"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
|
||||||
(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*
|
(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*
|
||||||
(?P<q4>['\"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4),
|
(?P<q4>['\"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
|
||||||
""", webpage) or
|
""", webpage) or
|
||||||
re.search(
|
re.search(
|
||||||
r'''(?xs)
|
r'''(?xs)
|
||||||
@ -266,6 +266,9 @@ class KalturaIE(InfoExtractor):
|
|||||||
# skip for now.
|
# skip for now.
|
||||||
if f.get('fileExt') == 'chun':
|
if f.get('fileExt') == 'chun':
|
||||||
continue
|
continue
|
||||||
|
if not f.get('fileExt') and f.get('containerFormat') == 'qt':
|
||||||
|
# QT indicates QuickTime; some videos have broken fileExt
|
||||||
|
f['fileExt'] = 'mov'
|
||||||
video_url = sign_url(
|
video_url = sign_url(
|
||||||
'%s/flavorId/%s' % (data_url, f['id']))
|
'%s/flavorId/%s' % (data_url, f['id']))
|
||||||
# audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g
|
# audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g
|
||||||
|
@ -1,25 +1,115 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
|
||||||
compat_urllib_parse_urlencode,
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
sanitized_Request,
|
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
xpath_element,
|
xpath_element,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
|
urljoin,
|
||||||
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Laola1TvEmbedIE(InfoExtractor):
|
||||||
|
IE_NAME = 'laola1tv:embed'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/titanplayer\.php\?.*?\bvideoid=(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
# flashvars.premium = "false";
|
||||||
|
'url': 'https://www.laola1.tv/titanplayer.php?videoid=708065&type=V&lang=en&portal=int&customer=1024',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '708065',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'MA Long CHN - FAN Zhendong CHN',
|
||||||
|
'uploader': 'ITTF - International Table Tennis Federation',
|
||||||
|
'upload_date': '20161211',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
flash_vars = self._search_regex(
|
||||||
|
r'(?s)flashvars\s*=\s*({.+?});', webpage, 'flash vars')
|
||||||
|
|
||||||
|
def get_flashvar(x, *args, **kwargs):
|
||||||
|
flash_var = self._search_regex(
|
||||||
|
r'%s\s*:\s*"([^"]+)"' % x,
|
||||||
|
flash_vars, x, default=None)
|
||||||
|
if not flash_var:
|
||||||
|
flash_var = self._search_regex([
|
||||||
|
r'flashvars\.%s\s*=\s*"([^"]+)"' % x,
|
||||||
|
r'%s\s*=\s*"([^"]+)"' % x],
|
||||||
|
webpage, x, *args, **kwargs)
|
||||||
|
return flash_var
|
||||||
|
|
||||||
|
hd_doc = self._download_xml(
|
||||||
|
'http://www.laola1.tv/server/hd_video.php', video_id, query={
|
||||||
|
'play': get_flashvar('streamid'),
|
||||||
|
'partner': get_flashvar('partnerid'),
|
||||||
|
'portal': get_flashvar('portalid'),
|
||||||
|
'lang': get_flashvar('sprache'),
|
||||||
|
'v5ident': '',
|
||||||
|
})
|
||||||
|
|
||||||
|
_v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k)
|
||||||
|
title = _v('title', fatal=True)
|
||||||
|
|
||||||
|
token_url = None
|
||||||
|
premium = get_flashvar('premium', default=None)
|
||||||
|
if premium:
|
||||||
|
token_url = update_url_query(
|
||||||
|
_v('url', fatal=True), {
|
||||||
|
'timestamp': get_flashvar('timestamp'),
|
||||||
|
'auth': get_flashvar('auth'),
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
data_abo = urlencode_postdata(
|
||||||
|
dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(','))))
|
||||||
|
token_url = self._download_json(
|
||||||
|
'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access',
|
||||||
|
video_id, query={
|
||||||
|
'videoId': _v('id'),
|
||||||
|
'target': self._search_regex(r'vs_target = (\d+);', webpage, 'vs target'),
|
||||||
|
'label': _v('label'),
|
||||||
|
'area': _v('area'),
|
||||||
|
}, data=data_abo)['data']['stream-access'][0]
|
||||||
|
|
||||||
|
token_doc = self._download_xml(
|
||||||
|
token_url, video_id, 'Downloading token',
|
||||||
|
headers=self.geo_verification_headers())
|
||||||
|
|
||||||
|
token_attrib = xpath_element(token_doc, './/token').attrib
|
||||||
|
|
||||||
|
if token_attrib['status'] != '0':
|
||||||
|
raise ExtractorError(
|
||||||
|
'Token error: %s' % token_attrib['comment'], expected=True)
|
||||||
|
|
||||||
|
formats = self._extract_akamai_formats(
|
||||||
|
'%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']),
|
||||||
|
video_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
categories_str = _v('meta_sports')
|
||||||
|
categories = categories_str.split(',') if categories_str else []
|
||||||
|
is_live = _v('islive') == 'true'
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._live_title(title) if is_live else title,
|
||||||
|
'upload_date': unified_strdate(_v('time_date')),
|
||||||
|
'uploader': _v('meta_organisation'),
|
||||||
|
'categories': categories,
|
||||||
|
'is_live': is_live,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class Laola1TvIE(InfoExtractor):
|
class Laola1TvIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/(?P<kind>[^/]+)/(?P<slug>[^/?#&]+)'
|
IE_NAME = 'laola1tv'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
|
'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -67,85 +157,20 @@ class Laola1TvIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
display_id = self._match_id(url)
|
||||||
display_id = mobj.group('slug')
|
|
||||||
kind = mobj.group('kind')
|
|
||||||
lang = mobj.group('lang')
|
|
||||||
portal = mobj.group('portal')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
if 'Dieser Livestream ist bereits beendet.' in webpage:
|
if 'Dieser Livestream ist bereits beendet.' in webpage:
|
||||||
raise ExtractorError('This live stream has already finished.', expected=True)
|
raise ExtractorError('This live stream has already finished.', expected=True)
|
||||||
|
|
||||||
iframe_url = self._search_regex(
|
iframe_url = urljoin(url, self._search_regex(
|
||||||
r'<iframe[^>]*?id="videoplayer"[^>]*?src="([^"]+)"',
|
r'<iframe[^>]*?id="videoplayer"[^>]*?src="([^"]+)"',
|
||||||
webpage, 'iframe url')
|
webpage, 'iframe url'))
|
||||||
|
|
||||||
video_id = self._search_regex(
|
|
||||||
r'videoid=(\d+)', iframe_url, 'video id')
|
|
||||||
|
|
||||||
iframe = self._download_webpage(compat_urlparse.urljoin(
|
|
||||||
url, iframe_url), display_id, 'Downloading iframe')
|
|
||||||
|
|
||||||
partner_id = self._search_regex(
|
|
||||||
r'partnerid\s*:\s*(["\'])(?P<partner_id>.+?)\1',
|
|
||||||
iframe, 'partner id', group='partner_id')
|
|
||||||
|
|
||||||
hd_doc = self._download_xml(
|
|
||||||
'http://www.laola1.tv/server/hd_video.php?%s'
|
|
||||||
% compat_urllib_parse_urlencode({
|
|
||||||
'play': video_id,
|
|
||||||
'partner': partner_id,
|
|
||||||
'portal': portal,
|
|
||||||
'lang': lang,
|
|
||||||
'v5ident': '',
|
|
||||||
}), display_id)
|
|
||||||
|
|
||||||
_v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k)
|
|
||||||
title = _v('title', fatal=True)
|
|
||||||
|
|
||||||
VS_TARGETS = {
|
|
||||||
'video': '2',
|
|
||||||
'livestream': '17',
|
|
||||||
}
|
|
||||||
|
|
||||||
req = sanitized_Request(
|
|
||||||
'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access?%s' %
|
|
||||||
compat_urllib_parse_urlencode({
|
|
||||||
'videoId': video_id,
|
|
||||||
'target': VS_TARGETS.get(kind, '2'),
|
|
||||||
'label': _v('label'),
|
|
||||||
'area': _v('area'),
|
|
||||||
}),
|
|
||||||
urlencode_postdata(
|
|
||||||
dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(',')))))
|
|
||||||
|
|
||||||
token_url = self._download_json(req, display_id)['data']['stream-access'][0]
|
|
||||||
token_doc = self._download_xml(token_url, display_id, 'Downloading token')
|
|
||||||
|
|
||||||
token_attrib = xpath_element(token_doc, './/token').attrib
|
|
||||||
token_auth = token_attrib['auth']
|
|
||||||
|
|
||||||
if token_auth in ('blocked', 'restricted', 'error'):
|
|
||||||
raise ExtractorError(
|
|
||||||
'Token error: %s' % token_attrib['comment'], expected=True)
|
|
||||||
|
|
||||||
formats = self._extract_f4m_formats(
|
|
||||||
'%s?hdnea=%s&hdcore=3.2.0' % (token_attrib['url'], token_auth),
|
|
||||||
video_id, f4m_id='hds')
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
categories_str = _v('meta_sports')
|
|
||||||
categories = categories_str.split(',') if categories_str else []
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'_type': 'url',
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'url': iframe_url,
|
||||||
'upload_date': unified_strdate(_v('time_date')),
|
'ie_key': 'Laola1TvEmbed',
|
||||||
'uploader': _v('meta_organisation'),
|
|
||||||
'categories': categories,
|
|
||||||
'is_live': _v('islive') == 'true',
|
|
||||||
'formats': formats,
|
|
||||||
}
|
}
|
||||||
|
104
youtube_dl/extractor/meipai.py
Normal file
104
youtube_dl/extractor/meipai.py
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MeipaiIE(InfoExtractor):
|
||||||
|
IE_DESC = '美拍'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?meipai.com/media/(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# regular uploaded video
|
||||||
|
'url': 'http://www.meipai.com/media/531697625',
|
||||||
|
'md5': 'e3e9600f9e55a302daecc90825854b4f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '531697625',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '#葉子##阿桑##余姿昀##超級女聲#',
|
||||||
|
'description': '#葉子##阿桑##余姿昀##超級女聲#',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 152,
|
||||||
|
'timestamp': 1465492420,
|
||||||
|
'upload_date': '20160609',
|
||||||
|
'view_count': 35511,
|
||||||
|
'creator': '她她-TATA',
|
||||||
|
'tags': ['葉子', '阿桑', '余姿昀', '超級女聲'],
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# record of live streaming
|
||||||
|
'url': 'http://www.meipai.com/media/585526361',
|
||||||
|
'md5': 'ff7d6afdbc6143342408223d4f5fb99a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '585526361',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '姿昀和善願 練歌練琴啦😁😁😁',
|
||||||
|
'description': '姿昀和善願 練歌練琴啦😁😁😁',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 5975,
|
||||||
|
'timestamp': 1474311799,
|
||||||
|
'upload_date': '20160919',
|
||||||
|
'view_count': 1215,
|
||||||
|
'creator': '她她-TATA',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._og_search_title(
|
||||||
|
webpage, default=None) or self._html_search_regex(
|
||||||
|
r'<title[^>]*>([^<]+)</title>', webpage, 'title')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
# recorded playback of live streaming
|
||||||
|
m3u8_url = self._html_search_regex(
|
||||||
|
r'file:\s*encodeURIComponent\((["\'])(?P<url>(?:(?!\1).)+)\1\)',
|
||||||
|
webpage, 'm3u8 url', group='url', default=None)
|
||||||
|
if m3u8_url:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
# regular uploaded video
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r'data-video=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'video url',
|
||||||
|
group='url', default=None)
|
||||||
|
if video_url:
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': 'http',
|
||||||
|
})
|
||||||
|
|
||||||
|
timestamp = unified_timestamp(self._og_search_property(
|
||||||
|
'video:release_date', webpage, 'release date', fatal=False))
|
||||||
|
|
||||||
|
tags = self._og_search_property(
|
||||||
|
'video:tag', webpage, 'tags', default='').split(',')
|
||||||
|
|
||||||
|
view_count = int_or_none(self._html_search_meta(
|
||||||
|
'interactionCount', webpage, 'view count'))
|
||||||
|
duration = parse_duration(self._html_search_meta(
|
||||||
|
'duration', webpage, 'duration'))
|
||||||
|
creator = self._og_search_property(
|
||||||
|
'video:director', webpage, 'creator', fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'view_count': view_count,
|
||||||
|
'creator': creator,
|
||||||
|
'tags': tags,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
72
youtube_dl/extractor/melonvod.py
Normal file
72
youtube_dl/extractor/melonvod.py
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MelonVODIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://vod\.melon\.com/video/detail2\.html?\?.*?mvId=(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://vod.melon.com/video/detail2.htm?mvId=50158734',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '50158734',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Jessica 'Wonderland' MV Making Film",
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'artist': 'Jessica (제시카)',
|
||||||
|
'upload_date': '20161212',
|
||||||
|
'duration': 203,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'm3u8 download',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
play_info = self._download_json(
|
||||||
|
'http://vod.melon.com/video/playerInfo.json', video_id,
|
||||||
|
note='Downloading player info JSON', query={'mvId': video_id})
|
||||||
|
|
||||||
|
title = play_info['mvInfo']['MVTITLE']
|
||||||
|
|
||||||
|
info = self._download_json(
|
||||||
|
'http://vod.melon.com/delivery/streamingInfo.json', video_id,
|
||||||
|
note='Downloading streaming info JSON',
|
||||||
|
query={
|
||||||
|
'contsId': video_id,
|
||||||
|
'contsType': 'VIDEO',
|
||||||
|
})
|
||||||
|
|
||||||
|
stream_info = info['streamingInfo']
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
stream_info['encUrl'], video_id, 'mp4', m3u8_id='hls')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
artist_list = play_info.get('artistList')
|
||||||
|
artist = None
|
||||||
|
if isinstance(artist_list, list):
|
||||||
|
artist = ', '.join(
|
||||||
|
[a['ARTISTNAMEWEBLIST']
|
||||||
|
for a in artist_list if a.get('ARTISTNAMEWEBLIST')])
|
||||||
|
|
||||||
|
thumbnail = urljoin(info.get('staticDomain'), stream_info.get('imgPath'))
|
||||||
|
|
||||||
|
duration = int_or_none(stream_info.get('playTime'))
|
||||||
|
upload_date = stream_info.get('mvSvcOpenDt', '')[:8] or None
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'artist': artist,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats
|
||||||
|
}
|
@ -22,7 +22,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class MixcloudIE(InfoExtractor):
|
class MixcloudIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
|
_VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
|
||||||
IE_NAME = 'mixcloud'
|
IE_NAME = 'mixcloud'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@ -51,6 +51,9 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
# See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js
|
# See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js
|
||||||
|
@ -78,11 +78,6 @@ class MSNIE(InfoExtractor):
|
|||||||
m3u8_formats = self._extract_m3u8_formats(
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
format_url, display_id, 'mp4',
|
format_url, display_id, 'mp4',
|
||||||
m3u8_id='hls', fatal=False)
|
m3u8_id='hls', fatal=False)
|
||||||
# Despite metadata in m3u8 all video+audio formats are
|
|
||||||
# actually video-only (no audio)
|
|
||||||
for f in m3u8_formats:
|
|
||||||
if f.get('acodec') != 'none' and f.get('vcodec') != 'none':
|
|
||||||
f['acodec'] = 'none'
|
|
||||||
formats.extend(m3u8_formats)
|
formats.extend(m3u8_formats)
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
|
@ -9,6 +9,7 @@ from ..utils import (
|
|||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -208,7 +209,7 @@ class NBCNewsIE(ThePlatformIE):
|
|||||||
'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
|
'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
|
||||||
'md5': 'af1adfa51312291a017720403826bb64',
|
'md5': 'af1adfa51312291a017720403826bb64',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '269389891880',
|
'id': 'p_tweet_snow_140529',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'How Twitter Reacted To The Snowden Interview',
|
'title': 'How Twitter Reacted To The Snowden Interview',
|
||||||
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
|
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
|
||||||
@ -232,7 +233,7 @@ class NBCNewsIE(ThePlatformIE):
|
|||||||
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
|
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
|
||||||
'md5': '73135a2e0ef819107bbb55a5a9b2a802',
|
'md5': '73135a2e0ef819107bbb55a5a9b2a802',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '394064451844',
|
'id': 'nn_netcast_150204',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
|
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
|
||||||
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
|
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
|
||||||
@ -245,7 +246,7 @@ class NBCNewsIE(ThePlatformIE):
|
|||||||
'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
|
'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
|
||||||
'md5': 'a49e173825e5fcd15c13fc297fced39d',
|
'md5': 'a49e173825e5fcd15c13fc297fced39d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '529953347624',
|
'id': 'x_lon_vwhorn_150922',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up',
|
'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up',
|
||||||
'description': 'md5:c8be487b2d80ff0594c005add88d8351',
|
'description': 'md5:c8be487b2d80ff0594c005add88d8351',
|
||||||
@ -258,7 +259,7 @@ class NBCNewsIE(ThePlatformIE):
|
|||||||
'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
|
'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
|
||||||
'md5': '118d7ca3f0bea6534f119c68ef539f71',
|
'md5': '118d7ca3f0bea6534f119c68ef539f71',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '669831235788',
|
'id': 'tdy_al_space_160420',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'See the aurora borealis from space in stunning new NASA video',
|
'title': 'See the aurora borealis from space in stunning new NASA video',
|
||||||
'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
|
'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
|
||||||
@ -271,7 +272,7 @@ class NBCNewsIE(ThePlatformIE):
|
|||||||
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
|
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
|
||||||
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
|
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '314487875924',
|
'id': 'n_hayes_Aimm_140801_272214',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The chaotic GOP immigration vote',
|
'title': 'The chaotic GOP immigration vote',
|
||||||
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
|
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
|
||||||
@ -279,7 +280,6 @@ class NBCNewsIE(ThePlatformIE):
|
|||||||
'timestamp': 1406937606,
|
'timestamp': 1406937606,
|
||||||
'upload_date': '20140802',
|
'upload_date': '20140802',
|
||||||
'uploader': 'NBCU-NEWS',
|
'uploader': 'NBCU-NEWS',
|
||||||
'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'],
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -311,28 +311,41 @@ class NBCNewsIE(ThePlatformIE):
|
|||||||
else:
|
else:
|
||||||
# "feature" and "nightly-news" pages use theplatform.com
|
# "feature" and "nightly-news" pages use theplatform.com
|
||||||
video_id = mobj.group('mpx_id')
|
video_id = mobj.group('mpx_id')
|
||||||
if not video_id.isdigit():
|
webpage = self._download_webpage(url, video_id)
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
info = None
|
filter_param = 'byId'
|
||||||
bootstrap_json = self._search_regex(
|
bootstrap_json = self._search_regex(
|
||||||
[r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
|
[r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
|
||||||
r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'],
|
r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"',
|
||||||
webpage, 'bootstrap json', default=None)
|
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);'],
|
||||||
|
webpage, 'bootstrap json', default=None)
|
||||||
|
if bootstrap_json:
|
||||||
bootstrap = self._parse_json(
|
bootstrap = self._parse_json(
|
||||||
bootstrap_json, video_id, transform_source=unescapeHTML)
|
bootstrap_json, video_id, transform_source=unescapeHTML)
|
||||||
|
|
||||||
|
info = None
|
||||||
if 'results' in bootstrap:
|
if 'results' in bootstrap:
|
||||||
info = bootstrap['results'][0]['video']
|
info = bootstrap['results'][0]['video']
|
||||||
elif 'video' in bootstrap:
|
elif 'video' in bootstrap:
|
||||||
info = bootstrap['video']
|
info = bootstrap['video']
|
||||||
|
elif 'msnbcVideoInfo' in bootstrap:
|
||||||
|
info = bootstrap['msnbcVideoInfo']['meta']
|
||||||
|
elif 'msnbcThePlatform' in bootstrap:
|
||||||
|
info = bootstrap['msnbcThePlatform']['videoPlayer']['video']
|
||||||
else:
|
else:
|
||||||
info = bootstrap
|
info = bootstrap
|
||||||
video_id = info['mpxId']
|
|
||||||
|
if 'guid' in info:
|
||||||
|
video_id = info['guid']
|
||||||
|
filter_param = 'byGuid'
|
||||||
|
elif 'mpxId' in info:
|
||||||
|
video_id = info['mpxId']
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
# http://feed.theplatform.com/f/2E2eJC/nbcnews also works
|
# http://feed.theplatform.com/f/2E2eJC/nbcnews also works
|
||||||
'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byId=%s' % video_id,
|
'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {filter_param: video_id}),
|
||||||
'ie_key': 'ThePlatformFeed',
|
'ie_key': 'ThePlatformFeed',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -48,6 +48,13 @@ class NRKBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
|
conviva = data.get('convivaStatistics') or {}
|
||||||
|
live = (data.get('mediaElementType') == 'Live' or
|
||||||
|
data.get('isLive') is True or conviva.get('isLive'))
|
||||||
|
|
||||||
|
def make_title(t):
|
||||||
|
return self._live_title(t) if live else t
|
||||||
|
|
||||||
media_assets = data.get('mediaAssets')
|
media_assets = data.get('mediaAssets')
|
||||||
if media_assets and isinstance(media_assets, list):
|
if media_assets and isinstance(media_assets, list):
|
||||||
def video_id_and_title(idx):
|
def video_id_and_title(idx):
|
||||||
@ -61,6 +68,13 @@ class NRKBaseIE(InfoExtractor):
|
|||||||
if not formats:
|
if not formats:
|
||||||
continue
|
continue
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
# Some f4m streams may not work with hdcore in fragments' URLs
|
||||||
|
for f in formats:
|
||||||
|
extra_param = f.get('extra_param_to_segment_url')
|
||||||
|
if extra_param and 'hdcore' in extra_param:
|
||||||
|
del f['extra_param_to_segment_url']
|
||||||
|
|
||||||
entry_id, entry_title = video_id_and_title(num)
|
entry_id, entry_title = video_id_and_title(num)
|
||||||
duration = parse_duration(asset.get('duration'))
|
duration = parse_duration(asset.get('duration'))
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
@ -72,7 +86,7 @@ class NRKBaseIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': asset.get('carrierId') or entry_id,
|
'id': asset.get('carrierId') or entry_id,
|
||||||
'title': entry_title,
|
'title': make_title(entry_title),
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
@ -87,7 +101,7 @@ class NRKBaseIE(InfoExtractor):
|
|||||||
duration = parse_duration(data.get('duration'))
|
duration = parse_duration(data.get('duration'))
|
||||||
entries = [{
|
entries = [{
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': make_title(title),
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}]
|
}]
|
||||||
@ -111,7 +125,6 @@ class NRKBaseIE(InfoExtractor):
|
|||||||
message_type, message_type)),
|
message_type, message_type)),
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
conviva = data.get('convivaStatistics') or {}
|
|
||||||
series = conviva.get('seriesName') or data.get('seriesTitle')
|
series = conviva.get('seriesName') or data.get('seriesTitle')
|
||||||
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
|
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
|
||||||
|
|
||||||
@ -260,6 +273,19 @@ class NRKTVIE(NRKBaseIE):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class NRKTVDirekteIE(NRKTVIE):
|
||||||
|
IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
|
||||||
|
_VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://tv.nrk.no/direkte/nrk1',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://radio.nrk.no/direkte/p1_oslo_akershus',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
class NRKPlaylistIE(InfoExtractor):
|
class NRKPlaylistIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
|
||||||
|
|
||||||
|
60
youtube_dl/extractor/ondemandkorea.py
Normal file
60
youtube_dl/extractor/ondemandkorea.py
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .jwplatform import JWPlatformBaseIE
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
js_to_json,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class OnDemandKoreaIE(JWPlatformBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ask-us-anything-e43',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ask Us Anything : E43',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'm3u8 download'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id, fatal=False)
|
||||||
|
|
||||||
|
if not webpage:
|
||||||
|
# Page sometimes returns captcha page with HTTP 403
|
||||||
|
raise ExtractorError(
|
||||||
|
'Unable to access page. You may have been blocked.',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
if 'msg_block_01.png' in webpage:
|
||||||
|
self.raise_geo_restricted(
|
||||||
|
'This content is not available in your region')
|
||||||
|
|
||||||
|
if 'This video is only available to ODK PLUS members.' in webpage:
|
||||||
|
raise ExtractorError(
|
||||||
|
'This video is only available to ODK PLUS members.',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
|
||||||
|
jw_config = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)jwplayer\(([\'"])(?:(?!\1).)+\1\)\.setup\s*\((?P<options>.+?)\);',
|
||||||
|
webpage, 'jw config', group='options'),
|
||||||
|
video_id, transform_source=js_to_json)
|
||||||
|
info = self._parse_jwplayer_data(
|
||||||
|
jw_config, video_id, require_title=False, m3u8_id='hls',
|
||||||
|
base_url=url)
|
||||||
|
|
||||||
|
info.update({
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
})
|
||||||
|
return info
|
@ -1,25 +1,16 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals, division
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_chr
|
||||||
compat_chr,
|
|
||||||
compat_ord,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
from ..jsinterp import (
|
|
||||||
JSInterpreter,
|
|
||||||
_NAME_RE
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class OpenloadIE(InfoExtractor):
|
class OpenloadIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://openload\.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
|
_VALID_URL = r'https?://(?:openload\.(?:co|io)|oload\.tv)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://openload.co/f/kUEfGclsU9o',
|
'url': 'https://openload.co/f/kUEfGclsU9o',
|
||||||
@ -60,46 +51,11 @@ class OpenloadIE(InfoExtractor):
|
|||||||
# for title and ext
|
# for title and ext
|
||||||
'url': 'https://openload.co/embed/Sxz5sADo82g/',
|
'url': 'https://openload.co/embed/Sxz5sADo82g/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://oload.tv/embed/KnG-kKZdcfY/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def openload_decode(self, txt):
|
|
||||||
symbol_dict = {
|
|
||||||
'(゚Д゚) [゚Θ゚]': '_',
|
|
||||||
'(゚Д゚) [゚ω゚ノ]': 'a',
|
|
||||||
'(゚Д゚) [゚Θ゚ノ]': 'b',
|
|
||||||
'(゚Д゚) [\'c\']': 'c',
|
|
||||||
'(゚Д゚) [゚ー゚ノ]': 'd',
|
|
||||||
'(゚Д゚) [゚Д゚ノ]': 'e',
|
|
||||||
'(゚Д゚) [1]': 'f',
|
|
||||||
'(゚Д゚) [\'o\']': 'o',
|
|
||||||
'(o゚ー゚o)': 'u',
|
|
||||||
'(゚Д゚) [\'c\']': 'c',
|
|
||||||
'((゚ー゚) + (o^_^o))': '7',
|
|
||||||
'((o^_^o) +(o^_^o) +(c^_^o))': '6',
|
|
||||||
'((゚ー゚) + (゚Θ゚))': '5',
|
|
||||||
'(-~3)': '4',
|
|
||||||
'(-~-~1)': '3',
|
|
||||||
'(-~1)': '2',
|
|
||||||
'(-~0)': '1',
|
|
||||||
'((c^_^o)-(c^_^o))': '0',
|
|
||||||
}
|
|
||||||
delim = '(゚Д゚)[゚ε゚]+'
|
|
||||||
end_token = '(゚Д゚)[゚o゚]'
|
|
||||||
symbols = '|'.join(map(re.escape, symbol_dict.keys()))
|
|
||||||
txt = re.sub('(%s)\+\s?' % symbols, lambda m: symbol_dict[m.group(1)], txt)
|
|
||||||
ret = ''
|
|
||||||
for aacode in re.findall(r'{0}\+\s?{1}(.*?){0}'.format(re.escape(end_token), re.escape(delim)), txt):
|
|
||||||
for aachar in aacode.split(delim):
|
|
||||||
if aachar.isdigit():
|
|
||||||
ret += compat_chr(int(aachar, 8))
|
|
||||||
else:
|
|
||||||
m = re.match(r'^u([\da-f]{4})$', aachar)
|
|
||||||
if m:
|
|
||||||
ret += compat_chr(int(m.group(1), 16))
|
|
||||||
else:
|
|
||||||
self.report_warning("Cannot decode: %s" % aachar)
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id)
|
webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id)
|
||||||
@ -107,36 +63,20 @@ class OpenloadIE(InfoExtractor):
|
|||||||
if 'File not found' in webpage or 'deleted by the owner' in webpage:
|
if 'File not found' in webpage or 'deleted by the owner' in webpage:
|
||||||
raise ExtractorError('File not found', expected=True)
|
raise ExtractorError('File not found', expected=True)
|
||||||
|
|
||||||
# The following decryption algorithm is written by @yokrysty and
|
ol_id = self._search_regex(
|
||||||
# declared to be freely used in youtube-dl
|
'<span[^>]+id="[a-zA-Z0-9]+x"[^>]*>([0-9]+)</span>',
|
||||||
# See https://github.com/rg3/youtube-dl/issues/10408
|
webpage, 'openload ID')
|
||||||
enc_data = self._html_search_regex(
|
|
||||||
r'<span[^>]*>([^<]+)</span>\s*<span[^>]*>[^<]+</span>\s*<span[^>]+id="streamurl"',
|
|
||||||
webpage, 'encrypted data')
|
|
||||||
|
|
||||||
enc_code = self._html_search_regex(r'<script[^>]+>(゚ω゚[^<]+)</script>',
|
first_two_chars = int(float(ol_id[0:][:2]))
|
||||||
webpage, 'encrypted code')
|
urlcode = ''
|
||||||
|
num = 2
|
||||||
|
|
||||||
js_code = self.openload_decode(enc_code)
|
while num < len(ol_id):
|
||||||
jsi = JSInterpreter(js_code)
|
urlcode += compat_chr(int(float(ol_id[num:][:3])) -
|
||||||
|
first_two_chars * int(float(ol_id[num + 3:][:2])))
|
||||||
|
num += 5
|
||||||
|
|
||||||
m_offset_fun = self._search_regex(r'slice\(0\s*-\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript offset function')
|
video_url = 'https://openload.co/stream/' + urlcode
|
||||||
m_diff_fun = self._search_regex(r'charCodeAt\(0\)\s*\+\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript diff function')
|
|
||||||
|
|
||||||
offset = jsi.call_function(m_offset_fun)
|
|
||||||
diff = jsi.call_function(m_diff_fun)
|
|
||||||
|
|
||||||
video_url_chars = []
|
|
||||||
|
|
||||||
for idx, c in enumerate(enc_data):
|
|
||||||
j = compat_ord(c)
|
|
||||||
if j >= 33 and j <= 126:
|
|
||||||
j = ((j + 14) % 94) + 33
|
|
||||||
if idx == len(enc_data) - offset:
|
|
||||||
j += diff
|
|
||||||
video_url_chars += compat_chr(j)
|
|
||||||
|
|
||||||
video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars)
|
|
||||||
|
|
||||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||||
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
|
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
|
||||||
@ -155,5 +95,4 @@ class OpenloadIE(InfoExtractor):
|
|||||||
'ext': determine_ext(title),
|
'ext': determine_ext(title),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
return info_dict
|
return info_dict
|
||||||
|
@ -11,6 +11,7 @@ from ..utils import (
|
|||||||
float_or_none,
|
float_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -56,6 +57,22 @@ class PandoraTVIE(InfoExtractor):
|
|||||||
r'^v(\d+)[Uu]rl$', format_id, 'height', default=None)
|
r'^v(\d+)[Uu]rl$', format_id, 'height', default=None)
|
||||||
if not height:
|
if not height:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
play_url = self._download_json(
|
||||||
|
'http://m.pandora.tv/?c=api&m=play_url', video_id,
|
||||||
|
data=urlencode_postdata({
|
||||||
|
'prgid': video_id,
|
||||||
|
'runtime': info.get('runtime'),
|
||||||
|
'vod_url': format_url,
|
||||||
|
}),
|
||||||
|
headers={
|
||||||
|
'Origin': url,
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
})
|
||||||
|
format_url = play_url.get('url')
|
||||||
|
if not format_url:
|
||||||
|
continue
|
||||||
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': '%sp' % height,
|
'format_id': '%sp' % height,
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
|
@ -350,6 +350,15 @@ class PBSIE(InfoExtractor):
|
|||||||
410: 'This video has expired and is no longer available for online streaming.',
|
410: 'This video has expired and is no longer available for online streaming.',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
cookie = (self._download_json(
|
||||||
|
'http://localization.services.pbs.org/localize/auto/cookie/',
|
||||||
|
None, headers=self.geo_verification_headers(), fatal=False) or {}).get('cookie')
|
||||||
|
if cookie:
|
||||||
|
station = self._search_regex(r'#?s=\["([^"]+)"', cookie, 'station')
|
||||||
|
if station:
|
||||||
|
self._set_cookie('.pbs.org', 'pbsol.station', station)
|
||||||
|
|
||||||
def _extract_webpage(self, url):
|
def _extract_webpage(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
@ -476,7 +485,8 @@ class PBSIE(InfoExtractor):
|
|||||||
|
|
||||||
redirect_info = self._download_json(
|
redirect_info = self._download_json(
|
||||||
'%s?format=json' % redirect['url'], display_id,
|
'%s?format=json' % redirect['url'], display_id,
|
||||||
'Downloading %s video url info' % (redirect_id or num))
|
'Downloading %s video url info' % (redirect_id or num),
|
||||||
|
headers=self.geo_verification_headers())
|
||||||
|
|
||||||
if redirect_info['status'] == 'error':
|
if redirect_info['status'] == 'error':
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
|
106
youtube_dl/extractor/piksel.py
Normal file
106
youtube_dl/extractor/piksel.py
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
dict_get,
|
||||||
|
int_or_none,
|
||||||
|
unescapeHTML,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PikselIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://player\.piksel\.com/v/(?P<id>[a-z0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://player.piksel.com/v/nv60p12f',
|
||||||
|
'md5': 'd9c17bbe9c3386344f9cfd32fad8d235',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'nv60p12f',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'فن الحياة - الحلقة 1',
|
||||||
|
'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور',
|
||||||
|
'timestamp': 1465231790,
|
||||||
|
'upload_date': '20160606',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_url(webpage):
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)',
|
||||||
|
webpage)
|
||||||
|
if mobj:
|
||||||
|
return mobj.group('url')
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
app_token = self._search_regex(
|
||||||
|
r'clientAPI\s*:\s*"([^"]+)"', webpage, 'app token')
|
||||||
|
response = self._download_json(
|
||||||
|
'http://player.piksel.com/ws/ws_program/api/%s/mode/json/apiv/5' % app_token,
|
||||||
|
video_id, query={
|
||||||
|
'v': video_id
|
||||||
|
})['response']
|
||||||
|
failure = response.get('failure')
|
||||||
|
if failure:
|
||||||
|
raise ExtractorError(response['failure']['reason'], expected=True)
|
||||||
|
video_data = response['WsProgramResponse']['program']['asset']
|
||||||
|
title = video_data['title']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
m3u8_url = dict_get(video_data, [
|
||||||
|
'm3u8iPadURL',
|
||||||
|
'ipadM3u8Url',
|
||||||
|
'm3u8AndroidURL',
|
||||||
|
'm3u8iPhoneURL',
|
||||||
|
'iphoneM3u8Url'])
|
||||||
|
if m3u8_url:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
|
asset_type = dict_get(video_data, ['assetType', 'asset_type'])
|
||||||
|
for asset_file in video_data.get('assetFiles', []):
|
||||||
|
# TODO: extract rtmp formats
|
||||||
|
http_url = asset_file.get('http_url')
|
||||||
|
if not http_url:
|
||||||
|
continue
|
||||||
|
tbr = None
|
||||||
|
vbr = int_or_none(asset_file.get('videoBitrate'), 1024)
|
||||||
|
abr = int_or_none(asset_file.get('audioBitrate'), 1024)
|
||||||
|
if asset_type == 'video':
|
||||||
|
tbr = vbr + abr
|
||||||
|
elif asset_type == 'audio':
|
||||||
|
tbr = abr
|
||||||
|
|
||||||
|
format_id = ['http']
|
||||||
|
if tbr:
|
||||||
|
format_id.append(compat_str(tbr))
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'format_id': '-'.join(format_id),
|
||||||
|
'url': unescapeHTML(http_url),
|
||||||
|
'vbr': vbr,
|
||||||
|
'abr': abr,
|
||||||
|
'width': int_or_none(asset_file.get('videoWidth')),
|
||||||
|
'height': int_or_none(asset_file.get('videoHeight')),
|
||||||
|
'filesize': int_or_none(asset_file.get('filesize')),
|
||||||
|
'tbr': tbr,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': video_data.get('description'),
|
||||||
|
'thumbnail': video_data.get('thumbnailUrl'),
|
||||||
|
'timestamp': parse_iso8601(video_data.get('dateadd')),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -85,6 +85,9 @@ class ProSiebenSat1BaseIE(InfoExtractor):
|
|||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
source_url, clip_id, 'mp4', 'm3u8_native',
|
source_url, clip_id, 'mp4', 'm3u8_native',
|
||||||
m3u8_id='hls', fatal=False))
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif mimetype == 'application/dash+xml':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
source_url, clip_id, mpd_id='dash', fatal=False))
|
||||||
else:
|
else:
|
||||||
tbr = fix_bitrate(source['bitrate'])
|
tbr = fix_bitrate(source['bitrate'])
|
||||||
if protocol in ('rtmp', 'rtmpe'):
|
if protocol in ('rtmp', 'rtmpe'):
|
||||||
|
@ -4,118 +4,31 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
float_or_none,
|
float_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class RteIE(InfoExtractor):
|
class RteBaseIE(InfoExtractor):
|
||||||
IE_NAME = 'rte'
|
|
||||||
IE_DESC = 'Raidió Teilifís Éireann TV'
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '10478715',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Watch iWitness online',
|
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
|
||||||
'description': 'iWitness : The spirit of Ireland, one voice and one minute at a time.',
|
|
||||||
'duration': 60.046,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': 'f4m fails with --test atm'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
|
||||||
description = self._html_search_meta('description', webpage, 'description')
|
|
||||||
duration = float_or_none(self._html_search_meta(
|
|
||||||
'duration', webpage, 'duration', fatal=False), 1000)
|
|
||||||
|
|
||||||
thumbnail = None
|
|
||||||
thumbnail_meta = self._html_search_meta('thumbnail', webpage)
|
|
||||||
if thumbnail_meta:
|
|
||||||
thumbnail_id = self._search_regex(
|
|
||||||
r'uri:irus:(.+)', thumbnail_meta,
|
|
||||||
'thumbnail id', fatal=False)
|
|
||||||
if thumbnail_id:
|
|
||||||
thumbnail = 'http://img.rasset.ie/%s.jpg' % thumbnail_id
|
|
||||||
|
|
||||||
feeds_url = self._html_search_meta('feeds-prefix', webpage, 'feeds url') + video_id
|
|
||||||
json_string = self._download_json(feeds_url, video_id)
|
|
||||||
|
|
||||||
# f4m_url = server + relative_url
|
|
||||||
f4m_url = json_string['shows'][0]['media:group'][0]['rte:server'] + json_string['shows'][0]['media:group'][0]['url']
|
|
||||||
f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
|
|
||||||
self._sort_formats(f4m_formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'formats': f4m_formats,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'duration': duration,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class RteRadioIE(InfoExtractor):
|
|
||||||
IE_NAME = 'rte:radio'
|
|
||||||
IE_DESC = 'Raidió Teilifís Éireann radio'
|
|
||||||
# Radioplayer URLs have two distinct specifier formats,
|
|
||||||
# the old format #!rii=<channel_id>:<id>:<playable_item_id>:<date>:
|
|
||||||
# the new format #!rii=b<channel_id>_<id>_<playable_item_id>_<date>_
|
|
||||||
# where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated.
|
|
||||||
# An <id> uniquely defines an individual recording, and is the only part we require.
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:b?[0-9]*)(?:%3A|:|%5F|_)(?P<id>[0-9]+)'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
# Old-style player URL; HLS and RTMPE formats
|
|
||||||
'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '10507902',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Gloria',
|
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
|
||||||
'description': 'md5:9ce124a7fb41559ec68f06387cabddf0',
|
|
||||||
'timestamp': 1451203200,
|
|
||||||
'upload_date': '20151227',
|
|
||||||
'duration': 7230.0,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': 'f4m fails with --test atm'
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
# New-style player URL; RTMPE formats only
|
|
||||||
'url': 'http://rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=b16_3250678_8861_06-04-2012_',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '3250678',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'The Lyric Concert with Paul Herriott',
|
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
|
||||||
'description': '',
|
|
||||||
'timestamp': 1333742400,
|
|
||||||
'upload_date': '20120406',
|
|
||||||
'duration': 7199.016,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': 'f4m fails with --test atm'
|
|
||||||
}
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
item_id = self._match_id(url)
|
item_id = self._match_id(url)
|
||||||
|
|
||||||
json_string = self._download_json(
|
try:
|
||||||
'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=' + item_id,
|
json_string = self._download_json(
|
||||||
item_id)
|
'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=' + item_id,
|
||||||
|
item_id)
|
||||||
|
except ExtractorError as ee:
|
||||||
|
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
|
||||||
|
error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False)
|
||||||
|
if error_info:
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: %s' % (self.IE_NAME, error_info['message']),
|
||||||
|
expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
# NB the string values in the JSON are stored using XML escaping(!)
|
# NB the string values in the JSON are stored using XML escaping(!)
|
||||||
show = json_string['shows'][0]
|
show = json_string['shows'][0]
|
||||||
@ -163,3 +76,67 @@ class RteRadioIE(InfoExtractor):
|
|||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RteIE(RteBaseIE):
|
||||||
|
IE_NAME = 'rte'
|
||||||
|
IE_DESC = 'Raidió Teilifís Éireann TV'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/',
|
||||||
|
'md5': '4a76eb3396d98f697e6e8110563d2604',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '10478715',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'iWitness',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'description': 'The spirit of Ireland, one voice and one minute at a time.',
|
||||||
|
'duration': 60.046,
|
||||||
|
'upload_date': '20151012',
|
||||||
|
'timestamp': 1444694160,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RteRadioIE(RteBaseIE):
|
||||||
|
IE_NAME = 'rte:radio'
|
||||||
|
IE_DESC = 'Raidió Teilifís Éireann radio'
|
||||||
|
# Radioplayer URLs have two distinct specifier formats,
|
||||||
|
# the old format #!rii=<channel_id>:<id>:<playable_item_id>:<date>:
|
||||||
|
# the new format #!rii=b<channel_id>_<id>_<playable_item_id>_<date>_
|
||||||
|
# where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated.
|
||||||
|
# An <id> uniquely defines an individual recording, and is the only part we require.
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:b?[0-9]*)(?:%3A|:|%5F|_)(?P<id>[0-9]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
# Old-style player URL; HLS and RTMPE formats
|
||||||
|
'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:',
|
||||||
|
'md5': 'c79ccb2c195998440065456b69760411',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '10507902',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Gloria',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'description': 'md5:9ce124a7fb41559ec68f06387cabddf0',
|
||||||
|
'timestamp': 1451203200,
|
||||||
|
'upload_date': '20151227',
|
||||||
|
'duration': 7230.0,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# New-style player URL; RTMPE formats only
|
||||||
|
'url': 'http://rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=b16_3250678_8861_06-04-2012_',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3250678',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'The Lyric Concert with Paul Herriott',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'description': '',
|
||||||
|
'timestamp': 1333742400,
|
||||||
|
'upload_date': '20120406',
|
||||||
|
'duration': 7199.016,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
@ -2,7 +2,9 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class RTL2IE(InfoExtractor):
|
class RTL2IE(InfoExtractor):
|
||||||
@ -13,7 +15,7 @@ class RTL2IE(InfoExtractor):
|
|||||||
'id': 'folge-203-0',
|
'id': 'folge-203-0',
|
||||||
'ext': 'f4v',
|
'ext': 'f4v',
|
||||||
'title': 'GRIP sucht den Sommerkönig',
|
'title': 'GRIP sucht den Sommerkönig',
|
||||||
'description': 'Matthias, Det und Helge treten gegeneinander an.'
|
'description': 'md5:e3adbb940fd3c6e76fa341b8748b562f'
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# rtmp download
|
# rtmp download
|
||||||
@ -25,7 +27,7 @@ class RTL2IE(InfoExtractor):
|
|||||||
'id': '21040-anna-erwischt-alex',
|
'id': '21040-anna-erwischt-alex',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Anna erwischt Alex!',
|
'title': 'Anna erwischt Alex!',
|
||||||
'description': 'Anna ist Alex\' Tochter bei Köln 50667.'
|
'description': 'Anna nimmt ihrem Vater nicht ab, dass er nicht spielt. Und tatsächlich erwischt sie ihn auf frischer Tat.'
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# rtmp download
|
# rtmp download
|
||||||
@ -52,34 +54,47 @@ class RTL2IE(InfoExtractor):
|
|||||||
r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
|
r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
|
||||||
vivi_id = self._html_search_regex(
|
vivi_id = self._html_search_regex(
|
||||||
r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
|
r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
|
||||||
info_url = 'http://www.rtl2.de/video/php/get_video.php?vico_id=' + vico_id + '&vivi_id=' + vivi_id
|
|
||||||
|
|
||||||
info = self._download_json(info_url, video_id)
|
info = self._download_json(
|
||||||
|
'http://www.rtl2.de/sites/default/modules/rtl2/mediathek/php/get_video_jw.php',
|
||||||
|
video_id, query={
|
||||||
|
'vico_id': vico_id,
|
||||||
|
'vivi_id': vivi_id,
|
||||||
|
})
|
||||||
video_info = info['video']
|
video_info = info['video']
|
||||||
title = video_info['titel']
|
title = video_info['titel']
|
||||||
description = video_info.get('beschreibung')
|
|
||||||
thumbnail = video_info.get('image')
|
|
||||||
|
|
||||||
download_url = video_info['streamurl']
|
formats = []
|
||||||
download_url = download_url.replace('\\', '')
|
|
||||||
stream_url = 'mp4:' + self._html_search_regex(r'ondemand/(.*)', download_url, 'stream URL')
|
rtmp_url = video_info.get('streamurl')
|
||||||
rtmp_conn = ['S:connect', 'O:1', 'NS:pageUrl:' + url, 'NB:fpad:0', 'NN:videoFunction:1', 'O:0']
|
if rtmp_url:
|
||||||
|
rtmp_url = rtmp_url.replace('\\', '')
|
||||||
|
stream_url = 'mp4:' + self._html_search_regex(r'/ondemand/(.+)', rtmp_url, 'stream URL')
|
||||||
|
rtmp_conn = ['S:connect', 'O:1', 'NS:pageUrl:' + url, 'NB:fpad:0', 'NN:videoFunction:1', 'O:0']
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'rtmp',
|
||||||
|
'url': rtmp_url,
|
||||||
|
'play_path': stream_url,
|
||||||
|
'player_url': 'http://www.rtl2.de/flashplayer/vipo_player.swf',
|
||||||
|
'page_url': url,
|
||||||
|
'flash_version': 'LNX 11,2,202,429',
|
||||||
|
'rtmp_conn': rtmp_conn,
|
||||||
|
'no_resume': True,
|
||||||
|
'preference': 1,
|
||||||
|
})
|
||||||
|
|
||||||
|
m3u8_url = video_info.get('streamurl_hls')
|
||||||
|
if m3u8_url:
|
||||||
|
formats.extend(self._extract_akamai_formats(m3u8_url, video_id))
|
||||||
|
|
||||||
formats = [{
|
|
||||||
'url': download_url,
|
|
||||||
'play_path': stream_url,
|
|
||||||
'player_url': 'http://www.rtl2.de/flashplayer/vipo_player.swf',
|
|
||||||
'page_url': url,
|
|
||||||
'flash_version': 'LNX 11,2,202,429',
|
|
||||||
'rtmp_conn': rtmp_conn,
|
|
||||||
'no_resume': True,
|
|
||||||
}]
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': video_info.get('image'),
|
||||||
'description': description,
|
'description': video_info.get('beschreibung'),
|
||||||
|
'duration': int_or_none(video_info.get('duration')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -4,27 +4,24 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .srgssr import SRGSSRIE
|
from .srgssr import SRGSSRIE
|
||||||
from ..compat import (
|
from ..compat import compat_str
|
||||||
compat_str,
|
|
||||||
compat_urllib_parse_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
xpath_text,
|
determine_ext,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class RTSIE(SRGSSRIE):
|
class RTSIE(SRGSSRIE):
|
||||||
IE_DESC = 'RTS.ch'
|
IE_DESC = 'RTS.ch'
|
||||||
_VALID_URL = r'rts:(?P<rts_id>\d+)|https?://(?:www\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html'
|
_VALID_URL = r'rts:(?P<rts_id>\d+)|https?://(?:.+?\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
|
'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
|
||||||
'md5': 'f254c4b26fb1d3c183793d52bc40d3e7',
|
'md5': 'ff7f8450a90cf58dacb64e29707b4a8e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3449373',
|
'id': '3449373',
|
||||||
'display_id': 'les-enfants-terribles',
|
'display_id': 'les-enfants-terribles',
|
||||||
@ -38,35 +35,17 @@ class RTSIE(SRGSSRIE):
|
|||||||
'thumbnail': 're:^https?://.*\.image',
|
'thumbnail': 're:^https?://.*\.image',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html',
|
'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html',
|
||||||
'md5': 'f1077ac5af686c76528dc8d7c5df29ba',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5742494',
|
'id': '5624065',
|
||||||
'display_id': '5742494',
|
'title': 'Passe-moi les jumelles',
|
||||||
'ext': 'mp4',
|
|
||||||
'duration': 3720,
|
|
||||||
'title': 'Les yeux dans les cieux - Mon homard au Canada',
|
|
||||||
'description': 'md5:d22ee46f5cc5bac0912e5a0c6d44a9f7',
|
|
||||||
'uploader': 'Passe-moi les jumelles',
|
|
||||||
'upload_date': '20140404',
|
|
||||||
'timestamp': 1396635300,
|
|
||||||
'thumbnail': 're:^https?://.*\.image',
|
|
||||||
'view_count': int,
|
|
||||||
},
|
},
|
||||||
'params': {
|
'playlist_mincount': 4,
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.rts.ch/video/sport/hockey/5745975-1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski.html',
|
'url': 'http://www.rts.ch/video/sport/hockey/5745975-1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski.html',
|
||||||
'md5': 'b4326fecd3eb64a458ba73c73e91299d',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5745975',
|
'id': '5745975',
|
||||||
'display_id': '1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski',
|
'display_id': '1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski',
|
||||||
@ -80,11 +59,15 @@ class RTSIE(SRGSSRIE):
|
|||||||
'thumbnail': 're:^https?://.*\.image',
|
'thumbnail': 're:^https?://.*\.image',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
'skip': 'Blocked outside Switzerland',
|
'skip': 'Blocked outside Switzerland',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html',
|
'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html',
|
||||||
'md5': '9f713382f15322181bb366cc8c3a4ff0',
|
'md5': '1bae984fe7b1f78e94abc74e802ed99f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5745356',
|
'id': '5745356',
|
||||||
'display_id': 'londres-cachee-par-un-epais-smog',
|
'display_id': 'londres-cachee-par-un-epais-smog',
|
||||||
@ -92,16 +75,12 @@ class RTSIE(SRGSSRIE):
|
|||||||
'duration': 33,
|
'duration': 33,
|
||||||
'title': 'Londres cachée par un épais smog',
|
'title': 'Londres cachée par un épais smog',
|
||||||
'description': 'Un important voile de smog recouvre Londres depuis mercredi, provoqué par la pollution et du sable du Sahara.',
|
'description': 'Un important voile de smog recouvre Londres depuis mercredi, provoqué par la pollution et du sable du Sahara.',
|
||||||
'uploader': 'Le Journal en continu',
|
'uploader': 'L\'actu en vidéo',
|
||||||
'upload_date': '20140403',
|
'upload_date': '20140403',
|
||||||
'timestamp': 1396537322,
|
'timestamp': 1396537322,
|
||||||
'thumbnail': 're:^https?://.*\.image',
|
'thumbnail': 're:^https?://.*\.image',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html',
|
'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html',
|
||||||
@ -125,6 +104,10 @@ class RTSIE(SRGSSRIE):
|
|||||||
'title': 'Hockey: Davos décroche son 31e titre de champion de Suisse',
|
'title': 'Hockey: Davos décroche son 31e titre de champion de Suisse',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 5,
|
'playlist_mincount': 5,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://pages.rts.ch/emissions/passe-moi-les-jumelles/5624065-entre-ciel-et-mer.html',
|
||||||
|
'only_matching': True,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -142,19 +125,32 @@ class RTSIE(SRGSSRIE):
|
|||||||
|
|
||||||
# media_id extracted out of URL is not always a real id
|
# media_id extracted out of URL is not always a real id
|
||||||
if 'video' not in all_info and 'audio' not in all_info:
|
if 'video' not in all_info and 'audio' not in all_info:
|
||||||
page = self._download_webpage(url, display_id)
|
entries = []
|
||||||
|
|
||||||
# article with videos on rhs
|
for item in all_info.get('items', []):
|
||||||
videos = re.findall(
|
item_url = item.get('url')
|
||||||
r'<article[^>]+class="content-item"[^>]*>\s*<a[^>]+data-video-urn="urn:([^"]+)"',
|
if not item_url:
|
||||||
page)
|
continue
|
||||||
if not videos:
|
entries.append(self.url_result(item_url, 'RTS'))
|
||||||
|
|
||||||
|
if not entries:
|
||||||
|
page, urlh = self._download_webpage_handle(url, display_id)
|
||||||
|
if re.match(self._VALID_URL, urlh.geturl()).group('id') != media_id:
|
||||||
|
return self.url_result(urlh.geturl(), 'RTS')
|
||||||
|
|
||||||
|
# article with videos on rhs
|
||||||
videos = re.findall(
|
videos = re.findall(
|
||||||
r'(?s)<iframe[^>]+class="srg-player"[^>]+src="[^"]+urn:([^"]+)"',
|
r'<article[^>]+class="content-item"[^>]*>\s*<a[^>]+data-video-urn="urn:([^"]+)"',
|
||||||
page)
|
page)
|
||||||
if videos:
|
if not videos:
|
||||||
entries = [self.url_result('srgssr:%s' % video_urn, 'SRGSSR') for video_urn in videos]
|
videos = re.findall(
|
||||||
return self.playlist_result(entries, media_id, self._og_search_title(page))
|
r'(?s)<iframe[^>]+class="srg-player"[^>]+src="[^"]+urn:([^"]+)"',
|
||||||
|
page)
|
||||||
|
if videos:
|
||||||
|
entries = [self.url_result('srgssr:%s' % video_urn, 'SRGSSR') for video_urn in videos]
|
||||||
|
|
||||||
|
if entries:
|
||||||
|
return self.playlist_result(entries, media_id, all_info.get('title'))
|
||||||
|
|
||||||
internal_id = self._html_search_regex(
|
internal_id = self._html_search_regex(
|
||||||
r'<(?:video|audio) data-id="([0-9]+)"', page,
|
r'<(?:video|audio) data-id="([0-9]+)"', page,
|
||||||
@ -168,36 +164,29 @@ class RTSIE(SRGSSRIE):
|
|||||||
|
|
||||||
info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio']
|
info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio']
|
||||||
|
|
||||||
upload_timestamp = parse_iso8601(info.get('broadcast_date'))
|
title = info['title']
|
||||||
duration = info.get('duration') or info.get('cutout') or info.get('cutduration')
|
|
||||||
if isinstance(duration, compat_str):
|
|
||||||
duration = parse_duration(duration)
|
|
||||||
view_count = info.get('plays')
|
|
||||||
thumbnail = unescapeHTML(info.get('preview_image_url'))
|
|
||||||
|
|
||||||
def extract_bitrate(url):
|
def extract_bitrate(url):
|
||||||
return int_or_none(self._search_regex(
|
return int_or_none(self._search_regex(
|
||||||
r'-([0-9]+)k\.', url, 'bitrate', default=None))
|
r'-([0-9]+)k\.', url, 'bitrate', default=None))
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, format_url in info['streams'].items():
|
streams = info.get('streams', {})
|
||||||
if format_id == 'hds_sd' and 'hds' in info['streams']:
|
for format_id, format_url in streams.items():
|
||||||
|
if format_id == 'hds_sd' and 'hds' in streams:
|
||||||
continue
|
continue
|
||||||
if format_id == 'hls_sd' and 'hls' in info['streams']:
|
if format_id == 'hls_sd' and 'hls' in streams:
|
||||||
continue
|
continue
|
||||||
if format_url.endswith('.f4m'):
|
ext = determine_ext(format_url)
|
||||||
token = self._download_xml(
|
if ext in ('m3u8', 'f4m'):
|
||||||
'http://tp.srgssr.ch/token/akahd.xml?stream=%s/*' % compat_urllib_parse_urlparse(format_url).path,
|
format_url = self._get_tokenized_src(format_url, media_id, format_id)
|
||||||
media_id, 'Downloading %s token' % format_id)
|
if ext == 'f4m':
|
||||||
auth_params = xpath_text(token, './/authparams', 'auth params')
|
formats.extend(self._extract_f4m_formats(
|
||||||
if not auth_params:
|
format_url + ('?' if '?' not in format_url else '&') + 'hdcore=3.4.0',
|
||||||
continue
|
media_id, f4m_id=format_id, fatal=False))
|
||||||
formats.extend(self._extract_f4m_formats(
|
else:
|
||||||
'%s?%s&hdcore=3.4.0&plugin=aasp-3.4.0.132.66' % (format_url, auth_params),
|
formats.extend(self._extract_m3u8_formats(
|
||||||
media_id, f4m_id=format_id, fatal=False))
|
format_url, media_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False))
|
||||||
elif format_url.endswith('.m3u8'):
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
format_url, media_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False))
|
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
@ -205,25 +194,37 @@ class RTSIE(SRGSSRIE):
|
|||||||
'tbr': extract_bitrate(format_url),
|
'tbr': extract_bitrate(format_url),
|
||||||
})
|
})
|
||||||
|
|
||||||
if 'media' in info:
|
for media in info.get('media', []):
|
||||||
formats.extend([{
|
media_url = media.get('url')
|
||||||
'format_id': '%s-%sk' % (media['ext'], media['rate']),
|
if not media_url or re.match(r'https?://', media_url):
|
||||||
'url': 'http://download-video.rts.ch/%s' % media['url'],
|
continue
|
||||||
'tbr': media['rate'] or extract_bitrate(media['url']),
|
rate = media.get('rate')
|
||||||
} for media in info['media'] if media.get('rate')])
|
ext = media.get('ext') or determine_ext(media_url, 'mp4')
|
||||||
|
format_id = ext
|
||||||
|
if rate:
|
||||||
|
format_id += '-%dk' % rate
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': 'http://download-video.rts.ch/' + media_url,
|
||||||
|
'tbr': rate or extract_bitrate(media_url),
|
||||||
|
})
|
||||||
|
|
||||||
self._check_formats(formats, media_id)
|
self._check_formats(formats, media_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
duration = info.get('duration') or info.get('cutout') or info.get('cutduration')
|
||||||
|
if isinstance(duration, compat_str):
|
||||||
|
duration = parse_duration(duration)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': media_id,
|
'id': media_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'title': info['title'],
|
'title': title,
|
||||||
'description': info.get('intro'),
|
'description': info.get('intro'),
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'view_count': view_count,
|
'view_count': int_or_none(info.get('plays')),
|
||||||
'uploader': info.get('programName'),
|
'uploader': info.get('programName'),
|
||||||
'timestamp': upload_timestamp,
|
'timestamp': parse_iso8601(info.get('broadcast_date')),
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': unescapeHTML(info.get('preview_image_url')),
|
||||||
}
|
}
|
||||||
|
@ -209,7 +209,10 @@ class RTVELiveIE(InfoExtractor):
|
|||||||
title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)
|
title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)
|
||||||
|
|
||||||
vidplayer_id = self._search_regex(
|
vidplayer_id = self._search_regex(
|
||||||
r'playerId=player([0-9]+)', webpage, 'internal video ID')
|
(r'playerId=player([0-9]+)',
|
||||||
|
r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)',
|
||||||
|
r'data-id=["\'](\d+)'),
|
||||||
|
webpage, 'internal video ID')
|
||||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/amonet/videos/%s.png' % vidplayer_id
|
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/amonet/videos/%s.png' % vidplayer_id
|
||||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||||
m3u8_url = _decrypt_url(png)
|
m3u8_url = _decrypt_url(png)
|
||||||
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urllib_parse_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
@ -23,6 +24,16 @@ class SRGSSRIE(InfoExtractor):
|
|||||||
'STARTDATE': 'This video is not yet available. Please try again later.',
|
'STARTDATE': 'This video is not yet available. Please try again later.',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _get_tokenized_src(self, url, video_id, format_id):
|
||||||
|
sp = compat_urllib_parse_urlparse(url).path.split('/')
|
||||||
|
token = self._download_json(
|
||||||
|
'http://tp.srgssr.ch/akahd/token?acl=/%s/%s/*' % (sp[1], sp[2]),
|
||||||
|
video_id, 'Downloading %s token' % format_id, fatal=False) or {}
|
||||||
|
auth_params = token.get('token', {}).get('authparams')
|
||||||
|
if auth_params:
|
||||||
|
url += '?' + auth_params
|
||||||
|
return url
|
||||||
|
|
||||||
def get_media_data(self, bu, media_type, media_id):
|
def get_media_data(self, bu, media_type, media_id):
|
||||||
media_data = self._download_json(
|
media_data = self._download_json(
|
||||||
'http://il.srgssr.ch/integrationlayer/1.0/ue/%s/%s/play/%s.json' % (bu, media_type, media_id),
|
'http://il.srgssr.ch/integrationlayer/1.0/ue/%s/%s/play/%s.json' % (bu, media_type, media_id),
|
||||||
@ -61,14 +72,16 @@ class SRGSSRIE(InfoExtractor):
|
|||||||
asset_url = asset['text']
|
asset_url = asset['text']
|
||||||
quality = asset['@quality']
|
quality = asset['@quality']
|
||||||
format_id = '%s-%s' % (protocol, quality)
|
format_id = '%s-%s' % (protocol, quality)
|
||||||
if protocol == 'HTTP-HDS':
|
if protocol.startswith('HTTP-HDS') or protocol.startswith('HTTP-HLS'):
|
||||||
formats.extend(self._extract_f4m_formats(
|
asset_url = self._get_tokenized_src(asset_url, media_id, format_id)
|
||||||
asset_url + '?hdcore=3.4.0', media_id,
|
if protocol.startswith('HTTP-HDS'):
|
||||||
f4m_id=format_id, fatal=False))
|
formats.extend(self._extract_f4m_formats(
|
||||||
elif protocol == 'HTTP-HLS':
|
asset_url + ('?' if '?' not in asset_url else '&') + 'hdcore=3.4.0',
|
||||||
formats.extend(self._extract_m3u8_formats(
|
media_id, f4m_id=format_id, fatal=False))
|
||||||
asset_url, media_id, 'mp4', 'm3u8_native',
|
elif protocol.startswith('HTTP-HLS'):
|
||||||
m3u8_id=format_id, fatal=False))
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
asset_url, media_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id=format_id, fatal=False))
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
@ -94,10 +107,10 @@ class SRGSSRPlayIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
|
'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
|
||||||
'md5': '4cd93523723beff51bb4bee974ee238d',
|
'md5': 'da6b5b3ac9fa4761a942331cef20fcb3',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5',
|
'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5',
|
||||||
'ext': 'm4v',
|
'ext': 'mp4',
|
||||||
'upload_date': '20130701',
|
'upload_date': '20130701',
|
||||||
'title': 'Snowden beantragt Asyl in Russland',
|
'title': 'Snowden beantragt Asyl in Russland',
|
||||||
'timestamp': 1372713995,
|
'timestamp': 1372713995,
|
||||||
|
@ -33,7 +33,9 @@ _x = lambda p: xpath_with_ns(p, {'smil': default_ns})
|
|||||||
|
|
||||||
class ThePlatformBaseIE(OnceIE):
|
class ThePlatformBaseIE(OnceIE):
|
||||||
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
|
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
|
||||||
meta = self._download_xml(smil_url, video_id, note=note, query={'format': 'SMIL'})
|
meta = self._download_xml(
|
||||||
|
smil_url, video_id, note=note, query={'format': 'SMIL'},
|
||||||
|
headers=self.geo_verification_headers())
|
||||||
error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
|
error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
|
||||||
if error_element is not None and error_element.attrib['src'].startswith(
|
if error_element is not None and error_element.attrib['src'].startswith(
|
||||||
'http://link.theplatform.com/s/errorFiles/Unavailable.'):
|
'http://link.theplatform.com/s/errorFiles/Unavailable.'):
|
||||||
|
@ -22,6 +22,7 @@ from ..utils import (
|
|||||||
orderedSet,
|
orderedSet,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
update_url_query,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -279,6 +280,18 @@ class TwitchVodIE(TwitchItemBaseIE):
|
|||||||
if 't' in query:
|
if 't' in query:
|
||||||
info['start_time'] = parse_duration(query['t'][0])
|
info['start_time'] = parse_duration(query['t'][0])
|
||||||
|
|
||||||
|
if info.get('timestamp') is not None:
|
||||||
|
info['subtitles'] = {
|
||||||
|
'rechat': [{
|
||||||
|
'url': update_url_query(
|
||||||
|
'https://rechat.twitch.tv/rechat-messages', {
|
||||||
|
'video_id': 'v%s' % item_id,
|
||||||
|
'start': info['timestamp'],
|
||||||
|
}),
|
||||||
|
'ext': 'json',
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
||||||
@ -300,7 +313,7 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
|
|||||||
response = self._call_api(
|
response = self._call_api(
|
||||||
self._PLAYLIST_PATH % (channel_id, offset, limit),
|
self._PLAYLIST_PATH % (channel_id, offset, limit),
|
||||||
channel_id,
|
channel_id,
|
||||||
'Downloading %s videos JSON page %s'
|
'Downloading %s JSON page %s'
|
||||||
% (self._PLAYLIST_TYPE, counter_override or counter))
|
% (self._PLAYLIST_TYPE, counter_override or counter))
|
||||||
page_entries = self._extract_playlist_page(response)
|
page_entries = self._extract_playlist_page(response)
|
||||||
if not page_entries:
|
if not page_entries:
|
||||||
@ -350,19 +363,72 @@ class TwitchProfileIE(TwitchPlaylistBaseIE):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE):
|
class TwitchVideosBaseIE(TwitchPlaylistBaseIE):
|
||||||
IE_NAME = 'twitch:past_broadcasts'
|
_VALID_URL_VIDEOS_BASE = r'%s/(?P<id>[^/]+)/videos' % TwitchBaseIE._VALID_URL_BASE
|
||||||
_VALID_URL = r'%s/(?P<id>[^/]+)/profile/past_broadcasts/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
|
_PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcast_type='
|
||||||
_PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcasts=true'
|
|
||||||
_PLAYLIST_TYPE = 'past broadcasts'
|
|
||||||
|
class TwitchAllVideosIE(TwitchVideosBaseIE):
|
||||||
|
IE_NAME = 'twitch:videos:all'
|
||||||
|
_VALID_URL = r'%s/all' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
|
||||||
|
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive,upload,highlight'
|
||||||
|
_PLAYLIST_TYPE = 'all videos'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.twitch.tv/spamfish/profile/past_broadcasts',
|
'url': 'https://www.twitch.tv/spamfish/videos/all',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'spamfish',
|
'id': 'spamfish',
|
||||||
'title': 'Spamfish',
|
'title': 'Spamfish',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 54,
|
'playlist_mincount': 869,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TwitchUploadsIE(TwitchVideosBaseIE):
|
||||||
|
IE_NAME = 'twitch:videos:uploads'
|
||||||
|
_VALID_URL = r'%s/uploads' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
|
||||||
|
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'upload'
|
||||||
|
_PLAYLIST_TYPE = 'uploads'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.twitch.tv/spamfish/videos/uploads',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'spamfish',
|
||||||
|
'title': 'Spamfish',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TwitchPastBroadcastsIE(TwitchVideosBaseIE):
|
||||||
|
IE_NAME = 'twitch:videos:past-broadcasts'
|
||||||
|
_VALID_URL = r'%s/past-broadcasts' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
|
||||||
|
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive'
|
||||||
|
_PLAYLIST_TYPE = 'past broadcasts'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.twitch.tv/spamfish/videos/past-broadcasts',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'spamfish',
|
||||||
|
'title': 'Spamfish',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TwitchHighlightsIE(TwitchVideosBaseIE):
|
||||||
|
IE_NAME = 'twitch:videos:highlights'
|
||||||
|
_VALID_URL = r'%s/highlights' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
|
||||||
|
_PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'highlight'
|
||||||
|
_PLAYLIST_TYPE = 'highlights'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.twitch.tv/spamfish/videos/highlights',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'spamfish',
|
||||||
|
'title': 'Spamfish',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 805,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
33
youtube_dl/extractor/uktvplay.py
Normal file
33
youtube_dl/extractor/uktvplay.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class UKTVPlayIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://uktvplay\.uktv\.co\.uk/.+?\?.*?\bvideo=(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001',
|
||||||
|
'md5': '',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2117008346001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Pincers',
|
||||||
|
'description': 'Pincers',
|
||||||
|
'uploader_id': '1242911124001',
|
||||||
|
'upload_date': '20130124',
|
||||||
|
'timestamp': 1359049267,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Failed to download MPD manifest']
|
||||||
|
}
|
||||||
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911124001/H1xnMOqP_default/index.html?videoId=%s'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
return self.url_result(
|
||||||
|
self.BRIGHTCOVE_URL_TEMPLATE % video_id,
|
||||||
|
'BrightcoveNew', video_id)
|
@ -30,7 +30,9 @@ class UplynkIE(InfoExtractor):
|
|||||||
def _extract_uplynk_info(self, uplynk_content_url):
|
def _extract_uplynk_info(self, uplynk_content_url):
|
||||||
path, external_id, video_id, session_id = re.match(UplynkIE._VALID_URL, uplynk_content_url).groups()
|
path, external_id, video_id, session_id = re.match(UplynkIE._VALID_URL, uplynk_content_url).groups()
|
||||||
display_id = video_id or external_id
|
display_id = video_id or external_id
|
||||||
formats = self._extract_m3u8_formats('http://content.uplynk.com/%s.m3u8' % path, display_id, 'mp4')
|
formats = self._extract_m3u8_formats(
|
||||||
|
'http://content.uplynk.com/%s.m3u8' % path,
|
||||||
|
display_id, 'mp4', 'm3u8_native')
|
||||||
if session_id:
|
if session_id:
|
||||||
for f in formats:
|
for f in formats:
|
||||||
f['extra_param_to_segment_url'] = 'pbs=' + session_id
|
f['extra_param_to_segment_url'] = 'pbs=' + session_id
|
||||||
|
@ -4,11 +4,22 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import urlencode_postdata
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
class Vbox7IE(InfoExtractor):
|
class Vbox7IE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?vbox7\.com/(?:play:|emb/external\.php\?.*?\bvid=)(?P<id>[\da-fA-F]+)'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:[^/]+\.)?vbox7\.com/
|
||||||
|
(?:
|
||||||
|
play:|
|
||||||
|
(?:
|
||||||
|
emb/external\.php|
|
||||||
|
player/ext\.swf
|
||||||
|
)\?.*?\bvid=
|
||||||
|
)
|
||||||
|
(?P<id>[\da-fA-F]+)
|
||||||
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://vbox7.com/play:0946fff23c',
|
'url': 'http://vbox7.com/play:0946fff23c',
|
||||||
'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
|
'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
|
||||||
@ -16,6 +27,14 @@ class Vbox7IE(InfoExtractor):
|
|||||||
'id': '0946fff23c',
|
'id': '0946fff23c',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Борисов: Притеснен съм за бъдещето на България',
|
'title': 'Борисов: Притеснен съм за бъдещето на България',
|
||||||
|
'description': 'По думите му е опасно страната ни да бъде обявена за "сигурна"',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': 1470982814,
|
||||||
|
'upload_date': '20160812',
|
||||||
|
'uploader': 'zdraveibulgaria',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'proxy': '127.0.0.1:8118',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://vbox7.com/play:249bb972c2',
|
'url': 'http://vbox7.com/play:249bb972c2',
|
||||||
@ -29,6 +48,9 @@ class Vbox7IE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
|
'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://i49.vbox7.com/player/ext.swf?vid=0946fff23c&autoplay=1',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -42,33 +64,41 @@ class Vbox7IE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
response = self._download_json(
|
||||||
'http://vbox7.com/play:%s' % video_id, video_id)
|
'https://www.vbox7.com/ajax/video/nextvideo.php?vid=%s' % video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
if 'error' in response:
|
||||||
r'<title>(.+?)</title>', webpage, 'title').split('/')[0].strip()
|
raise ExtractorError(
|
||||||
|
'%s said: %s' % (self.IE_NAME, response['error']), expected=True)
|
||||||
|
|
||||||
video_url = self._search_regex(
|
video = response['options']
|
||||||
r'src\s*:\s*(["\'])(?P<url>.+?.mp4.*?)\1',
|
|
||||||
webpage, 'video url', default=None, group='url')
|
|
||||||
|
|
||||||
thumbnail_url = self._og_search_thumbnail(webpage)
|
title = video['title']
|
||||||
|
video_url = video['src']
|
||||||
if not video_url:
|
|
||||||
info_response = self._download_webpage(
|
|
||||||
'http://vbox7.com/play/magare.do', video_id,
|
|
||||||
'Downloading info webpage',
|
|
||||||
data=urlencode_postdata({'as3': '1', 'vid': video_id}),
|
|
||||||
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
|
||||||
final_url, thumbnail_url = map(
|
|
||||||
lambda x: x.split('=')[1], info_response.split('&'))
|
|
||||||
|
|
||||||
if '/na.mp4' in video_url:
|
if '/na.mp4' in video_url:
|
||||||
self.raise_geo_restricted()
|
self.raise_geo_restricted()
|
||||||
|
|
||||||
return {
|
uploader = video.get('uploader')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'http://vbox7.com/play:%s' % video_id, video_id, fatal=None)
|
||||||
|
|
||||||
|
info = {}
|
||||||
|
|
||||||
|
if webpage:
|
||||||
|
info = self._search_json_ld(
|
||||||
|
webpage.replace('"/*@context"', '"@context"'), video_id,
|
||||||
|
fatal=False)
|
||||||
|
|
||||||
|
info.update({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': self._proto_relative_url(video_url, 'http:'),
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail_url,
|
'url': video_url,
|
||||||
}
|
'uploader': uploader,
|
||||||
|
'thumbnail': self._proto_relative_url(
|
||||||
|
info.get('thumbnail') or self._og_search_thumbnail(webpage),
|
||||||
|
'http:'),
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
@ -92,29 +92,30 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
|||||||
def _vimeo_sort_formats(self, formats):
|
def _vimeo_sort_formats(self, formats):
|
||||||
# Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
|
# Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
|
||||||
# at the same time without actual units specified. This lead to wrong sorting.
|
# at the same time without actual units specified. This lead to wrong sorting.
|
||||||
self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'format_id'))
|
self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id'))
|
||||||
|
|
||||||
def _parse_config(self, config, video_id):
|
def _parse_config(self, config, video_id):
|
||||||
|
video_data = config['video']
|
||||||
# Extract title
|
# Extract title
|
||||||
video_title = config['video']['title']
|
video_title = video_data['title']
|
||||||
|
|
||||||
# Extract uploader, uploader_url and uploader_id
|
# Extract uploader, uploader_url and uploader_id
|
||||||
video_uploader = config['video'].get('owner', {}).get('name')
|
video_uploader = video_data.get('owner', {}).get('name')
|
||||||
video_uploader_url = config['video'].get('owner', {}).get('url')
|
video_uploader_url = video_data.get('owner', {}).get('url')
|
||||||
video_uploader_id = video_uploader_url.split('/')[-1] if video_uploader_url else None
|
video_uploader_id = video_uploader_url.split('/')[-1] if video_uploader_url else None
|
||||||
|
|
||||||
# Extract video thumbnail
|
# Extract video thumbnail
|
||||||
video_thumbnail = config['video'].get('thumbnail')
|
video_thumbnail = video_data.get('thumbnail')
|
||||||
if video_thumbnail is None:
|
if video_thumbnail is None:
|
||||||
video_thumbs = config['video'].get('thumbs')
|
video_thumbs = video_data.get('thumbs')
|
||||||
if video_thumbs and isinstance(video_thumbs, dict):
|
if video_thumbs and isinstance(video_thumbs, dict):
|
||||||
_, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1]
|
_, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1]
|
||||||
|
|
||||||
# Extract video duration
|
# Extract video duration
|
||||||
video_duration = int_or_none(config['video'].get('duration'))
|
video_duration = int_or_none(video_data.get('duration'))
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
config_files = config['video'].get('files') or config['request'].get('files', {})
|
config_files = video_data.get('files') or config['request'].get('files', {})
|
||||||
for f in config_files.get('progressive', []):
|
for f in config_files.get('progressive', []):
|
||||||
video_url = f.get('url')
|
video_url = f.get('url')
|
||||||
if not video_url:
|
if not video_url:
|
||||||
@ -127,10 +128,24 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
|||||||
'fps': int_or_none(f.get('fps')),
|
'fps': int_or_none(f.get('fps')),
|
||||||
'tbr': int_or_none(f.get('bitrate')),
|
'tbr': int_or_none(f.get('bitrate')),
|
||||||
})
|
})
|
||||||
m3u8_url = config_files.get('hls', {}).get('url')
|
|
||||||
if m3u8_url:
|
for files_type in ('hls', 'dash'):
|
||||||
formats.extend(self._extract_m3u8_formats(
|
for cdn_name, cdn_data in config_files.get(files_type, {}).get('cdns', {}).items():
|
||||||
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
manifest_url = cdn_data.get('url')
|
||||||
|
if not manifest_url:
|
||||||
|
continue
|
||||||
|
format_id = '%s-%s' % (files_type, cdn_name)
|
||||||
|
if files_type == 'hls':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
manifest_url, video_id, 'mp4',
|
||||||
|
'm3u8_native', m3u8_id=format_id,
|
||||||
|
note='Downloading %s m3u8 information' % cdn_name,
|
||||||
|
fatal=False))
|
||||||
|
elif files_type == 'dash':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
manifest_url.replace('/master.json', '/master.mpd'), video_id, format_id,
|
||||||
|
'Downloading %s MPD information' % cdn_name,
|
||||||
|
fatal=False))
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
text_tracks = config['request'].get('text_tracks')
|
text_tracks = config['request'].get('text_tracks')
|
||||||
|
249
youtube_dl/extractor/viu.py
Normal file
249
youtube_dl/extractor/viu.py
Normal file
@ -0,0 +1,249 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ViuBaseIE(InfoExtractor):
|
||||||
|
def _real_initialize(self):
|
||||||
|
viu_auth_res = self._request_webpage(
|
||||||
|
'https://www.viu.com/api/apps/v2/authenticate', None,
|
||||||
|
'Requesting Viu auth', query={
|
||||||
|
'acct': 'test',
|
||||||
|
'appid': 'viu_desktop',
|
||||||
|
'fmt': 'json',
|
||||||
|
'iid': 'guest',
|
||||||
|
'languageid': 'default',
|
||||||
|
'platform': 'desktop',
|
||||||
|
'userid': 'guest',
|
||||||
|
'useridtype': 'guest',
|
||||||
|
'ver': '1.0'
|
||||||
|
}, headers=self.geo_verification_headers())
|
||||||
|
self._auth_token = viu_auth_res.info()['X-VIU-AUTH']
|
||||||
|
|
||||||
|
def _call_api(self, path, *args, **kwargs):
|
||||||
|
headers = self.geo_verification_headers()
|
||||||
|
headers.update({
|
||||||
|
'X-VIU-AUTH': self._auth_token
|
||||||
|
})
|
||||||
|
headers.update(kwargs.get('headers', {}))
|
||||||
|
kwargs['headers'] = headers
|
||||||
|
response = self._download_json(
|
||||||
|
'https://www.viu.com/api/' + path, *args, **kwargs)['response']
|
||||||
|
if response.get('status') != 'success':
|
||||||
|
raise ExtractorError('%s said: %s' % (
|
||||||
|
self.IE_NAME, response['message']), expected=True)
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
class ViuIE(ViuBaseIE):
|
||||||
|
_VALID_URL = r'(?:viu:|https?://www\.viu\.com/[a-z]{2}/media/)(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.viu.com/en/media/1116705532?containerId=playlist-22168059',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1116705532',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Citizen Khan - Ep 1',
|
||||||
|
'description': 'md5:d7ea1604f49e5ba79c212c551ce2110e',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'm3u8 download',
|
||||||
|
},
|
||||||
|
'skip': 'Geo-restricted to India',
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.viu.com/en/media/1130599965',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1130599965',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Jealousy Incarnate - Episode 1',
|
||||||
|
'description': 'md5:d3d82375cab969415d2720b6894361e9',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'm3u8 download',
|
||||||
|
},
|
||||||
|
'skip': 'Geo-restricted to Indonesia',
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
video_data = self._call_api(
|
||||||
|
'clip/load', video_id, 'Downloading video data', query={
|
||||||
|
'appid': 'viu_desktop',
|
||||||
|
'fmt': 'json',
|
||||||
|
'id': video_id
|
||||||
|
})['item'][0]
|
||||||
|
|
||||||
|
title = video_data['title']
|
||||||
|
|
||||||
|
m3u8_url = None
|
||||||
|
url_path = video_data.get('urlpathd') or video_data.get('urlpath')
|
||||||
|
tdirforwhole = video_data.get('tdirforwhole')
|
||||||
|
# #EXT-X-BYTERANGE is not supported by native hls downloader
|
||||||
|
# and ffmpeg (#10955)
|
||||||
|
# hls_file = video_data.get('hlsfile')
|
||||||
|
hls_file = video_data.get('jwhlsfile')
|
||||||
|
if url_path and tdirforwhole and hls_file:
|
||||||
|
m3u8_url = '%s/%s/%s' % (url_path, tdirforwhole, hls_file)
|
||||||
|
else:
|
||||||
|
# m3u8_url = re.sub(
|
||||||
|
# r'(/hlsc_)[a-z]+(\d+\.m3u8)',
|
||||||
|
# r'\1whe\2', video_data['href'])
|
||||||
|
m3u8_url = video_data['href']
|
||||||
|
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for key, value in video_data.items():
|
||||||
|
mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
|
||||||
|
if not mobj:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault(mobj.group('lang'), []).append({
|
||||||
|
'url': value,
|
||||||
|
'ext': mobj.group('ext')
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': video_data.get('description'),
|
||||||
|
'series': video_data.get('moviealbumshowname'),
|
||||||
|
'episode': title,
|
||||||
|
'episode_number': int_or_none(video_data.get('episodeno')),
|
||||||
|
'duration': int_or_none(video_data.get('duration')),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ViuPlaylistIE(ViuBaseIE):
|
||||||
|
IE_NAME = 'viu:playlist'
|
||||||
|
_VALID_URL = r'https?://www\.viu\.com/[^/]+/listing/playlist-(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.viu.com/en/listing/playlist-22461380',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '22461380',
|
||||||
|
'title': 'The Good Wife',
|
||||||
|
},
|
||||||
|
'playlist_count': 16,
|
||||||
|
'skip': 'Geo-restricted to Indonesia',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
playlist_data = self._call_api(
|
||||||
|
'container/load', playlist_id,
|
||||||
|
'Downloading playlist info', query={
|
||||||
|
'appid': 'viu_desktop',
|
||||||
|
'fmt': 'json',
|
||||||
|
'id': 'playlist-' + playlist_id
|
||||||
|
})['container']
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for item in playlist_data.get('item', []):
|
||||||
|
item_id = item.get('id')
|
||||||
|
if not item_id:
|
||||||
|
continue
|
||||||
|
item_id = compat_str(item_id)
|
||||||
|
entries.append(self.url_result(
|
||||||
|
'viu:' + item_id, 'Viu', item_id))
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, playlist_data.get('title'))
|
||||||
|
|
||||||
|
|
||||||
|
class ViuOTTIE(InfoExtractor):
|
||||||
|
IE_NAME = 'viu:ott'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?viu\.com/ott/(?P<country_code>[a-z]{2})/[a-z]{2}-[a-z]{2}/vod/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.viu.com/ott/sg/en-us/vod/3421/The%20Prime%20Minister%20and%20I',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3421',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'A New Beginning',
|
||||||
|
'description': 'md5:1e7486a619b6399b25ba6a41c0fe5b2c',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'm3u8 download',
|
||||||
|
},
|
||||||
|
'skip': 'Geo-restricted to Singapore',
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.viu.com/ott/hk/zh-hk/vod/7123/%E5%A4%A7%E4%BA%BA%E5%A5%B3%E5%AD%90',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7123',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '這就是我的生活之道',
|
||||||
|
'description': 'md5:4eb0d8b08cf04fcdc6bbbeb16043434f',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'm3u8 download',
|
||||||
|
},
|
||||||
|
'skip': 'Geo-restricted to Hong Kong',
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
country_code, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
|
||||||
|
product_data = self._download_json(
|
||||||
|
'http://www.viu.com/ott/%s/index.php' % country_code, video_id,
|
||||||
|
'Downloading video info', query={
|
||||||
|
'r': 'vod/ajax-detail',
|
||||||
|
'platform_flag_label': 'web',
|
||||||
|
'product_id': video_id,
|
||||||
|
})['data']
|
||||||
|
|
||||||
|
video_data = product_data.get('current_product')
|
||||||
|
if not video_data:
|
||||||
|
raise ExtractorError('This video is not available in your region.', expected=True)
|
||||||
|
|
||||||
|
stream_data = self._download_json(
|
||||||
|
'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
|
||||||
|
video_id, 'Downloading stream info', query={
|
||||||
|
'ccs_product_id': video_data['ccs_product_id'],
|
||||||
|
})['data']['stream']
|
||||||
|
|
||||||
|
stream_sizes = stream_data.get('size', {})
|
||||||
|
formats = []
|
||||||
|
for vid_format, stream_url in stream_data.get('url', {}).items():
|
||||||
|
height = int_or_none(self._search_regex(
|
||||||
|
r's(\d+)p', vid_format, 'height', default=None))
|
||||||
|
formats.append({
|
||||||
|
'format_id': vid_format,
|
||||||
|
'url': stream_url,
|
||||||
|
'height': height,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'filesize': int_or_none(stream_sizes.get(vid_format))
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for sub in video_data.get('subtitle', []):
|
||||||
|
sub_url = sub.get('url')
|
||||||
|
if not sub_url:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault(sub.get('name'), []).append({
|
||||||
|
'url': sub_url,
|
||||||
|
'ext': 'srt',
|
||||||
|
})
|
||||||
|
|
||||||
|
title = video_data['synopsis'].strip()
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': video_data.get('description'),
|
||||||
|
'series': product_data.get('series', {}).get('name'),
|
||||||
|
'episode': title,
|
||||||
|
'episode_number': int_or_none(video_data.get('number')),
|
||||||
|
'duration': int_or_none(stream_data.get('duration')),
|
||||||
|
'thumbnail': video_data.get('cover_image_url'),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
@ -10,6 +10,7 @@ from ..utils import (
|
|||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
remove_start,
|
remove_start,
|
||||||
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
from ..compat import compat_urllib_parse_urlencode
|
from ..compat import compat_urllib_parse_urlencode
|
||||||
|
|
||||||
@ -48,17 +49,23 @@ class VLiveIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://www.vlive.tv/video/%s' % video_id, video_id)
|
'http://www.vlive.tv/video/%s' % video_id, video_id)
|
||||||
|
|
||||||
video_params = self._search_regex(
|
VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)'
|
||||||
r'\bvlive\.video\.init\(([^)]+)\)',
|
VIDEO_PARAMS_FIELD = 'video params'
|
||||||
webpage, 'video params')
|
|
||||||
status, _, _, live_params, long_video_id, key = re.split(
|
params = self._parse_json(self._search_regex(
|
||||||
r'"\s*,\s*"', video_params)[2:8]
|
VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD, default=''), video_id,
|
||||||
|
transform_source=lambda s: '[' + s + ']', fatal=False)
|
||||||
|
|
||||||
|
if not params or len(params) < 7:
|
||||||
|
params = self._search_regex(
|
||||||
|
VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD)
|
||||||
|
params = [p.strip(r'"') for p in re.split(r'\s*,\s*', params)]
|
||||||
|
|
||||||
|
status, long_video_id, key = params[2], params[5], params[6]
|
||||||
status = remove_start(status, 'PRODUCT_')
|
status = remove_start(status, 'PRODUCT_')
|
||||||
|
|
||||||
if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR':
|
if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR':
|
||||||
live_params = self._parse_json('"%s"' % live_params, video_id)
|
return self._live(video_id, webpage)
|
||||||
live_params = self._parse_json(live_params, video_id)
|
|
||||||
return self._live(video_id, webpage, live_params)
|
|
||||||
elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO':
|
elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO':
|
||||||
if long_video_id and key:
|
if long_video_id and key:
|
||||||
return self._replay(video_id, webpage, long_video_id, key)
|
return self._replay(video_id, webpage, long_video_id, key)
|
||||||
@ -89,7 +96,22 @@ class VLiveIE(InfoExtractor):
|
|||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _live(self, video_id, webpage, live_params):
|
def _live(self, video_id, webpage):
|
||||||
|
init_page = self._download_webpage(
|
||||||
|
'http://www.vlive.tv/video/init/view',
|
||||||
|
video_id, note='Downloading live webpage',
|
||||||
|
data=urlencode_postdata({'videoSeq': video_id}),
|
||||||
|
headers={
|
||||||
|
'Referer': 'http://www.vlive.tv/video/%s' % video_id,
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded'
|
||||||
|
})
|
||||||
|
|
||||||
|
live_params = self._search_regex(
|
||||||
|
r'"liveStreamInfo"\s*:\s*(".*"),',
|
||||||
|
init_page, 'live stream info')
|
||||||
|
live_params = self._parse_json(live_params, video_id)
|
||||||
|
live_params = self._parse_json(live_params, video_id)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for vid in live_params.get('resolutions', []):
|
for vid in live_params.get('resolutions', []):
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
@ -98,10 +120,14 @@ class VLiveIE(InfoExtractor):
|
|||||||
fatal=False, live=True))
|
fatal=False, live=True))
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return dict(self._get_common_fields(webpage),
|
info = self._get_common_fields(webpage)
|
||||||
id=video_id,
|
info.update({
|
||||||
formats=formats,
|
'title': self._live_title(info['title']),
|
||||||
is_live=True)
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'is_live': True,
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
|
||||||
def _replay(self, video_id, webpage, long_video_id, key):
|
def _replay(self, video_id, webpage, long_video_id, key):
|
||||||
playinfo = self._download_json(
|
playinfo = self._download_json(
|
||||||
@ -135,8 +161,11 @@ class VLiveIE(InfoExtractor):
|
|||||||
'ext': 'vtt',
|
'ext': 'vtt',
|
||||||
'url': caption['source']}]
|
'url': caption['source']}]
|
||||||
|
|
||||||
return dict(self._get_common_fields(webpage),
|
info = self._get_common_fields(webpage)
|
||||||
id=video_id,
|
info.update({
|
||||||
formats=formats,
|
'id': video_id,
|
||||||
view_count=view_count,
|
'formats': formats,
|
||||||
subtitles=subtitles)
|
'view_count': view_count,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
@ -7,6 +7,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -66,10 +67,9 @@ class VpornIE(InfoExtractor):
|
|||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'class="(?:descr|description_txt)">(.*?)</div>',
|
r'class="(?:descr|description_txt)">(.*?)</div>',
|
||||||
webpage, 'description', fatal=False)
|
webpage, 'description', fatal=False)
|
||||||
thumbnail = self._html_search_regex(
|
thumbnail = urljoin('http://www.vporn.com', self._html_search_regex(
|
||||||
r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description', fatal=False, default=None)
|
r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description',
|
||||||
if thumbnail:
|
default=None))
|
||||||
thumbnail = 'http://www.vporn.com' + thumbnail
|
|
||||||
|
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'(?s)Uploaded by:.*?<a href="/user/[^"]+"[^>]*>(.+?)</a>',
|
r'(?s)Uploaded by:.*?<a href="/user/[^"]+"[^>]*>(.+?)</a>',
|
||||||
|
140
youtube_dl/extractor/vvvvid.py
Normal file
140
youtube_dl/extractor/vvvvid.py
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class VVVVIDIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?vvvvid\.it/#!(?:show|anime|film|series)/(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# video_type == 'video/vvvvid'
|
||||||
|
'url': 'https://www.vvvvid.it/#!show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048/ping-pong',
|
||||||
|
'md5': 'b8d3cecc2e981adc3835adf07f6df91b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '489048',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ping Pong',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# video_type == 'video/rcs'
|
||||||
|
'url': 'https://www.vvvvid.it/#!show/376/death-note-live-action/377/482493/episodio-01',
|
||||||
|
'md5': '33e0edfba720ad73a8782157fdebc648',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '482493',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Episodio 01',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
_conn_id = None
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._conn_id = self._download_json(
|
||||||
|
'https://www.vvvvid.it/user/login',
|
||||||
|
None, headers=self.geo_verification_headers())['data']['conn_id']
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
show_id, season_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
response = self._download_json(
|
||||||
|
'https://www.vvvvid.it/vvvvid/ondemand/%s/season/%s' % (show_id, season_id),
|
||||||
|
video_id, headers=self.geo_verification_headers(), query={
|
||||||
|
'conn_id': self._conn_id,
|
||||||
|
})
|
||||||
|
if response['result'] == 'error':
|
||||||
|
raise ExtractorError('%s said: %s' % (
|
||||||
|
self.IE_NAME, response['message']), expected=True)
|
||||||
|
|
||||||
|
vid = int(video_id)
|
||||||
|
video_data = list(filter(
|
||||||
|
lambda episode: episode.get('video_id') == vid, response['data']))[0]
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
# vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js
|
||||||
|
def ds(h):
|
||||||
|
g = "MNOPIJKL89+/4567UVWXQRSTEFGHABCDcdefYZabstuvopqr0123wxyzklmnghij"
|
||||||
|
|
||||||
|
def f(m):
|
||||||
|
l = []
|
||||||
|
o = 0
|
||||||
|
b = False
|
||||||
|
m_len = len(m)
|
||||||
|
while ((not b) and o < m_len):
|
||||||
|
n = m[o] << 2
|
||||||
|
o += 1
|
||||||
|
k = -1
|
||||||
|
j = -1
|
||||||
|
if o < m_len:
|
||||||
|
n += m[o] >> 4
|
||||||
|
o += 1
|
||||||
|
if o < m_len:
|
||||||
|
k = (m[o - 1] << 4) & 255
|
||||||
|
k += m[o] >> 2
|
||||||
|
o += 1
|
||||||
|
if o < m_len:
|
||||||
|
j = (m[o - 1] << 6) & 255
|
||||||
|
j += m[o]
|
||||||
|
o += 1
|
||||||
|
else:
|
||||||
|
b = True
|
||||||
|
else:
|
||||||
|
b = True
|
||||||
|
else:
|
||||||
|
b = True
|
||||||
|
l.append(n)
|
||||||
|
if k != -1:
|
||||||
|
l.append(k)
|
||||||
|
if j != -1:
|
||||||
|
l.append(j)
|
||||||
|
return l
|
||||||
|
|
||||||
|
c = []
|
||||||
|
for e in h:
|
||||||
|
c.append(g.index(e))
|
||||||
|
|
||||||
|
c_len = len(c)
|
||||||
|
for e in range(c_len * 2 - 1, -1, -1):
|
||||||
|
a = c[e % c_len] ^ c[(e + 1) % c_len]
|
||||||
|
c[e % c_len] = a
|
||||||
|
|
||||||
|
c = f(c)
|
||||||
|
d = ''
|
||||||
|
for e in c:
|
||||||
|
d += chr(e)
|
||||||
|
|
||||||
|
return d
|
||||||
|
|
||||||
|
for quality in ('_sd', ''):
|
||||||
|
embed_code = video_data.get('embed_info' + quality)
|
||||||
|
if not embed_code:
|
||||||
|
continue
|
||||||
|
embed_code = ds(embed_code)
|
||||||
|
video_type = video_data.get('video_type')
|
||||||
|
if video_type in ('video/rcs', 'video/kenc'):
|
||||||
|
formats.extend(self._extract_akamai_formats(
|
||||||
|
embed_code, video_id))
|
||||||
|
else:
|
||||||
|
formats.extend(self._extract_wowza_formats(
|
||||||
|
'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_data['title'],
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': video_data.get('thumbnail'),
|
||||||
|
'duration': int_or_none(video_data.get('length')),
|
||||||
|
'series': video_data.get('show_title'),
|
||||||
|
'season_id': season_id,
|
||||||
|
'season_number': video_data.get('season_number'),
|
||||||
|
'episode_id': str_or_none(video_data.get('id')),
|
||||||
|
'epidode_number': int_or_none(video_data.get('number')),
|
||||||
|
'episode_title': video_data['title'],
|
||||||
|
'view_count': int_or_none(video_data.get('views')),
|
||||||
|
'like_count': int_or_none(video_data.get('video_likes')),
|
||||||
|
}
|
@ -86,6 +86,11 @@ std_headers = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
USER_AGENTS = {
|
||||||
|
'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
NO_DEFAULT = object()
|
NO_DEFAULT = object()
|
||||||
|
|
||||||
ENGLISH_MONTH_NAMES = [
|
ENGLISH_MONTH_NAMES = [
|
||||||
@ -1695,6 +1700,16 @@ def base_url(url):
|
|||||||
return re.match(r'https?://[^?#&]+/', url).group()
|
return re.match(r'https?://[^?#&]+/', url).group()
|
||||||
|
|
||||||
|
|
||||||
|
def urljoin(base, path):
|
||||||
|
if not isinstance(path, compat_str) or not path:
|
||||||
|
return None
|
||||||
|
if re.match(r'^(?:https?:)?//', path):
|
||||||
|
return path
|
||||||
|
if not isinstance(base, compat_str) or not re.match(r'^(?:https?:)?//', base):
|
||||||
|
return None
|
||||||
|
return compat_urlparse.urljoin(base, path)
|
||||||
|
|
||||||
|
|
||||||
class HEADRequest(compat_urllib_request.Request):
|
class HEADRequest(compat_urllib_request.Request):
|
||||||
def get_method(self):
|
def get_method(self):
|
||||||
return 'HEAD'
|
return 'HEAD'
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2016.12.01'
|
__version__ = '2016.12.22'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user