commit
cb3322eb89
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.05*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.05**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.22**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2017.01.05
|
[debug] youtube-dl version 2017.01.22
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
@ -124,7 +124,7 @@ After you have ensured this site is distributing its content legally, you can fo
|
|||||||
'id': '42',
|
'id': '42',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Video title goes here',
|
'title': 'Video title goes here',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
# TODO more properties, either as:
|
# TODO more properties, either as:
|
||||||
# * A value
|
# * A value
|
||||||
# * MD5 checksum; start the string with md5:
|
# * MD5 checksum; start the string with md5:
|
||||||
|
85
ChangeLog
85
ChangeLog
@ -1,9 +1,92 @@
|
|||||||
version <unreleased>
|
version 2017.01.22
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [pornflip] Add support for pornflip.com (#11556, #11795)
|
||||||
|
* [chaturbate] Fix extraction (#11797, #11802)
|
||||||
|
+ [azmedien] Add support for AZ Medien sites (#11784, #11785)
|
||||||
|
+ [nextmedia] Support redirected URLs
|
||||||
|
+ [vimeo:channel] Extract videos' titles for playlist entries (#11796)
|
||||||
|
+ [youtube] Extract episode metadata (#9695, #11774)
|
||||||
|
+ [cspan] Support Ustream embedded videos (#11547)
|
||||||
|
+ [1tv] Add support for HLS videos (#11786)
|
||||||
|
* [uol] Fix extraction (#11770)
|
||||||
|
* [mtv] Relax triforce feed regular expression (#11766)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.01.18
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [bilibili] Fix extraction (#11077)
|
||||||
|
+ [canalplus] Add fallback for video id (#11764)
|
||||||
|
* [20min] Fix extraction (#11683, #11751)
|
||||||
|
* [imdb] Extend URL regular expression (#11744)
|
||||||
|
+ [naver] Add support for tv.naver.com links (#11743)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.01.16
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [options] Apply custom config to final composite configuration (#11741)
|
||||||
|
* [YoutubeDL] Improve protocol auto determining (#11720)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [xiami] Relax URL regular expressions
|
||||||
|
* [xiami] Improve track metadata extraction (#11699)
|
||||||
|
+ [limelight] Check hand-make direct HTTP links
|
||||||
|
+ [limelight] Add support for direct HTTP links at video.llnw.net (#11737)
|
||||||
|
+ [brightcove] Recognize another player ID pattern (#11688)
|
||||||
|
+ [niconico] Support login via cookies (#7968)
|
||||||
|
* [yourupload] Fix extraction (#11601)
|
||||||
|
+ [beam:live] Add support for beam.pro live streams (#10702, #11596)
|
||||||
|
* [vevo] Improve geo restriction detection
|
||||||
|
+ [dramafever] Add support for URLs with language code (#11714)
|
||||||
|
* [cbc] Improve playlist support (#11704)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.01.14
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [common] Add ability to customize akamai manifest host
|
||||||
|
+ [utils] Add more date formats
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
- [mtv] Eliminate _transform_rtmp_url
|
||||||
|
* [mtv] Generalize triforce mgid extraction
|
||||||
|
+ [cmt] Add support for full episodes and video clips (#11623)
|
||||||
|
+ [mitele] Extract DASH formats
|
||||||
|
+ [ooyala] Add support for videos with embedToken (#11684)
|
||||||
|
* [mixcloud] Fix extraction (#11674)
|
||||||
|
* [openload] Fix extraction (#10408)
|
||||||
|
* [tv4] Improve extraction (#11698)
|
||||||
|
* [freesound] Fix and improve extraction (#11602)
|
||||||
|
+ [nick] Add support for beta.nick.com (#11655)
|
||||||
|
* [mtv,cc] Use HLS by default with native HLS downloader (#11641)
|
||||||
|
* [mtv] Fix non-HLS extraction
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.01.10
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Fix extraction (#11663, #11664)
|
||||||
|
+ [inc] Add support for inc.com (#11277, #11647)
|
||||||
|
+ [youtube] Add itag 212 (#11575)
|
||||||
|
+ [egghead:course] Add support for egghead.io courses
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.01.08
|
||||||
|
|
||||||
Core
|
Core
|
||||||
* Fix "invalid escape sequence" errors under Python 3.6 (#11581)
|
* Fix "invalid escape sequence" errors under Python 3.6 (#11581)
|
||||||
|
|
||||||
Extractors
|
Extractors
|
||||||
|
+ [hitrecord] Add support for hitrecord.org (#10867, #11626)
|
||||||
|
- [videott] Remove extractor
|
||||||
|
* [swrmediathek] Improve extraction
|
||||||
|
- [sharesix] Remove extractor
|
||||||
|
- [aol:features] Remove extractor
|
||||||
|
* [sendtonews] Improve info extraction
|
||||||
|
* [3sat,phoenix] Fix extraction (#11619)
|
||||||
|
* [comedycentral/mtv] Add support for HLS videos (#11600)
|
||||||
* [discoverygo] Fix JSON data parsing (#11219, #11522)
|
* [discoverygo] Fix JSON data parsing (#11219, #11522)
|
||||||
|
|
||||||
|
|
||||||
|
@ -374,7 +374,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
avprobe)
|
avprobe)
|
||||||
--audio-format FORMAT Specify audio format: "best", "aac",
|
--audio-format FORMAT Specify audio format: "best", "aac",
|
||||||
"vorbis", "mp3", "m4a", "opus", or "wav";
|
"vorbis", "mp3", "m4a", "opus", or "wav";
|
||||||
"best" by default
|
"best" by default; No effect without -x
|
||||||
--audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert
|
--audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert
|
||||||
a value between 0 (better) and 9 (worse)
|
a value between 0 (better) and 9 (worse)
|
||||||
for VBR or a specific bitrate like 128K
|
for VBR or a specific bitrate like 128K
|
||||||
@ -841,7 +841,7 @@ Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
|
|||||||
|
|
||||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox).
|
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox).
|
||||||
|
|
||||||
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, Mac OS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||||
|
|
||||||
Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare).
|
Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare).
|
||||||
|
|
||||||
|
@ -74,6 +74,8 @@
|
|||||||
- **awaan:live**
|
- **awaan:live**
|
||||||
- **awaan:season**
|
- **awaan:season**
|
||||||
- **awaan:video**
|
- **awaan:video**
|
||||||
|
- **AZMedien**: AZ Medien videos
|
||||||
|
- **AZMedienShow**: AZ Medien shows
|
||||||
- **Azubu**
|
- **Azubu**
|
||||||
- **AzubuLive**
|
- **AzubuLive**
|
||||||
- **BaiduVideo**: 百度视频
|
- **BaiduVideo**: 百度视频
|
||||||
@ -86,6 +88,7 @@
|
|||||||
- **bbc.co.uk:article**: BBC articles
|
- **bbc.co.uk:article**: BBC articles
|
||||||
- **bbc.co.uk:iplayer:playlist**
|
- **bbc.co.uk:iplayer:playlist**
|
||||||
- **bbc.co.uk:playlist**
|
- **bbc.co.uk:playlist**
|
||||||
|
- **Beam:live**
|
||||||
- **Beatport**
|
- **Beatport**
|
||||||
- **Beeg**
|
- **Beeg**
|
||||||
- **BehindKink**
|
- **BehindKink**
|
||||||
@ -214,6 +217,7 @@
|
|||||||
- **EaglePlatform**
|
- **EaglePlatform**
|
||||||
- **EbaumsWorld**
|
- **EbaumsWorld**
|
||||||
- **EchoMsk**
|
- **EchoMsk**
|
||||||
|
- **egghead:course**: egghead.io course
|
||||||
- **eHow**
|
- **eHow**
|
||||||
- **Einthusan**
|
- **Einthusan**
|
||||||
- **eitb.tv**
|
- **eitb.tv**
|
||||||
@ -240,7 +244,6 @@
|
|||||||
- **fc2**
|
- **fc2**
|
||||||
- **fc2:embed**
|
- **fc2:embed**
|
||||||
- **Fczenit**
|
- **Fczenit**
|
||||||
- **features.aol.com**
|
|
||||||
- **fernsehkritik.tv**
|
- **fernsehkritik.tv**
|
||||||
- **Firstpost**
|
- **Firstpost**
|
||||||
- **FiveTV**
|
- **FiveTV**
|
||||||
@ -304,6 +307,7 @@
|
|||||||
- **history:topic**: History.com Topic
|
- **history:topic**: History.com Topic
|
||||||
- **hitbox**
|
- **hitbox**
|
||||||
- **hitbox:live**
|
- **hitbox:live**
|
||||||
|
- **HitRecord**
|
||||||
- **HornBunny**
|
- **HornBunny**
|
||||||
- **HotNewHipHop**
|
- **HotNewHipHop**
|
||||||
- **HotStar**
|
- **HotStar**
|
||||||
@ -321,6 +325,7 @@
|
|||||||
- **Imgur**
|
- **Imgur**
|
||||||
- **ImgurAlbum**
|
- **ImgurAlbum**
|
||||||
- **Ina**
|
- **Ina**
|
||||||
|
- **Inc**
|
||||||
- **Indavideo**
|
- **Indavideo**
|
||||||
- **IndavideoEmbed**
|
- **IndavideoEmbed**
|
||||||
- **InfoQ**
|
- **InfoQ**
|
||||||
@ -569,6 +574,7 @@
|
|||||||
- **PolskieRadio**
|
- **PolskieRadio**
|
||||||
- **PolskieRadioCategory**
|
- **PolskieRadioCategory**
|
||||||
- **PornCom**
|
- **PornCom**
|
||||||
|
- **PornFlip**
|
||||||
- **PornHd**
|
- **PornHd**
|
||||||
- **PornHub**: PornHub and Thumbzilla
|
- **PornHub**: PornHub and Thumbzilla
|
||||||
- **PornHubPlaylist**
|
- **PornHubPlaylist**
|
||||||
@ -650,7 +656,6 @@
|
|||||||
- **screen.yahoo:search**: Yahoo screen search
|
- **screen.yahoo:search**: Yahoo screen search
|
||||||
- **Screencast**
|
- **Screencast**
|
||||||
- **ScreencastOMatic**
|
- **ScreencastOMatic**
|
||||||
- **ScreenJunkies**
|
|
||||||
- **Seeker**
|
- **Seeker**
|
||||||
- **SenateISVP**
|
- **SenateISVP**
|
||||||
- **SendtoNews**
|
- **SendtoNews**
|
||||||
@ -658,7 +663,6 @@
|
|||||||
- **Sexu**
|
- **Sexu**
|
||||||
- **Shahid**
|
- **Shahid**
|
||||||
- **Shared**: shared.sx
|
- **Shared**: shared.sx
|
||||||
- **ShareSix**
|
|
||||||
- **ShowRoomLive**
|
- **ShowRoomLive**
|
||||||
- **Sina**
|
- **Sina**
|
||||||
- **SixPlay**
|
- **SixPlay**
|
||||||
@ -845,7 +849,6 @@
|
|||||||
- **videomore:season**
|
- **videomore:season**
|
||||||
- **videomore:video**
|
- **videomore:video**
|
||||||
- **VideoPremium**
|
- **VideoPremium**
|
||||||
- **VideoTt**: video.tt - Your True Tube (Currently broken)
|
|
||||||
- **videoweed**: VideoWeed
|
- **videoweed**: VideoWeed
|
||||||
- **Vidio**
|
- **Vidio**
|
||||||
- **vidme**
|
- **vidme**
|
||||||
|
@ -295,6 +295,9 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227')
|
self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227')
|
||||||
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
|
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
|
||||||
self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207')
|
self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207')
|
||||||
|
self.assertEqual(unified_strdate('July 15th, 2013'), '20130715')
|
||||||
|
self.assertEqual(unified_strdate('September 1st, 2013'), '20130901')
|
||||||
|
self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902')
|
||||||
|
|
||||||
def test_unified_timestamps(self):
|
def test_unified_timestamps(self):
|
||||||
self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
|
self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
|
||||||
|
@ -1363,7 +1363,7 @@ class YoutubeDL(object):
|
|||||||
format['ext'] = determine_ext(format['url']).lower()
|
format['ext'] = determine_ext(format['url']).lower()
|
||||||
# Automatically determine protocol if missing (useful for format
|
# Automatically determine protocol if missing (useful for format
|
||||||
# selection purposes)
|
# selection purposes)
|
||||||
if 'protocol' not in format:
|
if format.get('protocol') is None:
|
||||||
format['protocol'] = determine_protocol(format)
|
format['protocol'] = determine_protocol(format)
|
||||||
# Add HTTP headers, so that external programs can use them from the
|
# Add HTTP headers, so that external programs can use them from the
|
||||||
# json output
|
# json output
|
||||||
|
145
youtube_dl/extractor/azmedien.py
Normal file
145
youtube_dl/extractor/azmedien.py
Normal file
@ -0,0 +1,145 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .kaltura import KalturaIE
|
||||||
|
from ..utils import (
|
||||||
|
get_element_by_class,
|
||||||
|
strip_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AZMedienBaseIE(InfoExtractor):
|
||||||
|
def _kaltura_video(self, partner_id, entry_id):
|
||||||
|
return self.url_result(
|
||||||
|
'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(),
|
||||||
|
video_id=entry_id)
|
||||||
|
|
||||||
|
|
||||||
|
class AZMedienIE(AZMedienBaseIE):
|
||||||
|
IE_DESC = 'AZ Medien videos'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:www\.)?
|
||||||
|
(?:
|
||||||
|
telezueri\.ch|
|
||||||
|
telebaern\.tv|
|
||||||
|
telem1\.ch
|
||||||
|
)/
|
||||||
|
[0-9]+-show-[^/\#]+
|
||||||
|
(?:
|
||||||
|
/[0-9]+-episode-[^/\#]+
|
||||||
|
(?:
|
||||||
|
/[0-9]+-segment-(?:[^/\#]+\#)?|
|
||||||
|
\#
|
||||||
|
)|
|
||||||
|
\#
|
||||||
|
)
|
||||||
|
(?P<id>[^\#]+)
|
||||||
|
'''
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
# URL with 'segment'
|
||||||
|
'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1_2444peh4',
|
||||||
|
'ext': 'mov',
|
||||||
|
'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
|
||||||
|
'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
|
||||||
|
'uploader_id': 'TeleZ?ri',
|
||||||
|
'upload_date': '20161218',
|
||||||
|
'timestamp': 1482084490,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# URL with 'segment' and fragment:
|
||||||
|
'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger',
|
||||||
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
# URL with 'episode' and fragment:
|
||||||
|
'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz',
|
||||||
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
# URL with 'show' and fragment:
|
||||||
|
'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch',
|
||||||
|
'only_matching': True
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
partner_id = self._search_regex(
|
||||||
|
r'<script[^>]+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)',
|
||||||
|
webpage, 'kaltura partner id')
|
||||||
|
entry_id = self._html_search_regex(
|
||||||
|
r'<a[^>]+data-id=(["\'])(?P<id>(?:(?!\1).)+)\1[^>]+data-slug=["\']%s'
|
||||||
|
% re.escape(video_id), webpage, 'kaltura entry id', group='id')
|
||||||
|
|
||||||
|
return self._kaltura_video(partner_id, entry_id)
|
||||||
|
|
||||||
|
|
||||||
|
class AZMedienShowIE(AZMedienBaseIE):
|
||||||
|
IE_DESC = 'AZ Medien shows'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:www\.)?
|
||||||
|
(?:
|
||||||
|
telezueri\.ch|
|
||||||
|
telebaern\.tv|
|
||||||
|
telem1\.ch
|
||||||
|
)/
|
||||||
|
(?P<id>[0-9]+-show-[^/\#]+
|
||||||
|
(?:
|
||||||
|
/[0-9]+-episode-[^/\#]+
|
||||||
|
)?
|
||||||
|
)$
|
||||||
|
'''
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
# URL with 'episode'
|
||||||
|
'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016',
|
||||||
|
'title': 'News - Donnerstag, 15. Dezember 2016',
|
||||||
|
},
|
||||||
|
'playlist_count': 9,
|
||||||
|
}, {
|
||||||
|
# URL with 'show' only
|
||||||
|
'url': 'http://www.telezueri.ch/86-show-talktaeglich',
|
||||||
|
'only_matching': True
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
show_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, show_id)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
|
||||||
|
partner_id = self._search_regex(
|
||||||
|
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
||||||
|
webpage, 'kaltura partner id', default=None)
|
||||||
|
|
||||||
|
if partner_id:
|
||||||
|
entries = [
|
||||||
|
self._kaltura_video(partner_id, m.group('id'))
|
||||||
|
for m in re.finditer(
|
||||||
|
r'data-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage)]
|
||||||
|
|
||||||
|
if not entries:
|
||||||
|
entries = [
|
||||||
|
self.url_result(m.group('url'), ie=AZMedienIE.ie_key())
|
||||||
|
for m in re.finditer(
|
||||||
|
r'<a[^>]+data-real=(["\'])(?P<url>http.+?)\1', webpage)]
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
r'episodeShareTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||||
|
webpage, 'title',
|
||||||
|
default=strip_or_none(get_element_by_class(
|
||||||
|
'title-block-cell', webpage)), group='title')
|
||||||
|
|
||||||
|
return self.playlist_result(entries, show_id, title)
|
73
youtube_dl/extractor/beampro.py
Normal file
73
youtube_dl/extractor/beampro.py
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
clean_html,
|
||||||
|
compat_str,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BeamProLiveIE(InfoExtractor):
|
||||||
|
IE_NAME = 'Beam:live'
|
||||||
|
_VALID_URL = r'https?://(?:\w+\.)?beam\.pro/(?P<id>[^/?#&]+)'
|
||||||
|
_RATINGS = {'family': 0, 'teen': 13, '18+': 18}
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.beam.pro/niterhayven',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '261562',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Introducing The Witcher 3 // The Grind Starts Now!',
|
||||||
|
'description': 'md5:0b161ac080f15fe05d18a07adb44a74d',
|
||||||
|
'thumbnail': r're:https://.*\.jpg$',
|
||||||
|
'timestamp': 1483477281,
|
||||||
|
'upload_date': '20170103',
|
||||||
|
'uploader': 'niterhayven',
|
||||||
|
'uploader_id': '373396',
|
||||||
|
'age_limit': 18,
|
||||||
|
'is_live': True,
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
'skip': 'niterhayven is offline',
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
channel_name = self._match_id(url)
|
||||||
|
|
||||||
|
chan = self._download_json(
|
||||||
|
'https://beam.pro/api/v1/channels/%s' % channel_name, channel_name)
|
||||||
|
|
||||||
|
if chan.get('online') is False:
|
||||||
|
raise ExtractorError(
|
||||||
|
'{0} is offline'.format(channel_name), expected=True)
|
||||||
|
|
||||||
|
channel_id = chan['id']
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
'https://beam.pro/api/v1/channels/%s/manifest.m3u8' % channel_id,
|
||||||
|
channel_name, ext='mp4', m3u8_id='hls', fatal=False)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': compat_str(chan.get('id') or channel_name),
|
||||||
|
'title': self._live_title(chan.get('name') or channel_name),
|
||||||
|
'description': clean_html(chan.get('description')),
|
||||||
|
'thumbnail': try_get(chan, lambda x: x['thumbnail']['url'], compat_str),
|
||||||
|
'timestamp': parse_iso8601(chan.get('updatedAt')),
|
||||||
|
'uploader': chan.get('token') or try_get(
|
||||||
|
chan, lambda x: x['user']['username'], compat_str),
|
||||||
|
'uploader_id': compat_str(user_id) if user_id else None,
|
||||||
|
'age_limit': self._RATINGS.get(chan.get('audience')),
|
||||||
|
'is_live': True,
|
||||||
|
'view_count': int_or_none(chan.get('viewersTotal')),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -34,8 +34,8 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
_APP_KEY = '6f90a59ac58a4123'
|
_APP_KEY = '84956560bc028eb7'
|
||||||
_BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326'
|
_BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
@ -179,7 +179,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
|
|
||||||
params = {}
|
params = {}
|
||||||
|
|
||||||
playerID = find_param('playerID')
|
playerID = find_param('playerID') or find_param('playerId')
|
||||||
if playerID is None:
|
if playerID is None:
|
||||||
raise ExtractorError('Cannot find player ID')
|
raise ExtractorError('Cannot find player ID')
|
||||||
params['playerID'] = playerID
|
params['playerID'] = playerID
|
||||||
@ -204,7 +204,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
# // build Brightcove <object /> XML
|
# // build Brightcove <object /> XML
|
||||||
# }
|
# }
|
||||||
m = re.search(
|
m = re.search(
|
||||||
r'''(?x)customBC.\createVideo\(
|
r'''(?x)customBC\.createVideo\(
|
||||||
.*? # skipping width and height
|
.*? # skipping width and height
|
||||||
["\'](?P<playerID>\d+)["\']\s*,\s* # playerID
|
["\'](?P<playerID>\d+)["\']\s*,\s* # playerID
|
||||||
["\'](?P<playerKey>AQ[^"\']{48})[^"\']*["\']\s*,\s* # playerKey begins with AQ and is 50 characters
|
["\'](?P<playerKey>AQ[^"\']{48})[^"\']*["\']\s*,\s* # playerKey begins with AQ and is 50 characters
|
||||||
|
@ -107,7 +107,7 @@ class CanalplusIE(InfoExtractor):
|
|||||||
[r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)',
|
[r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)',
|
||||||
r'id=["\']canal_video_player(?P<id>\d+)',
|
r'id=["\']canal_video_player(?P<id>\d+)',
|
||||||
r'data-video=["\'](?P<id>\d+)'],
|
r'data-video=["\'](?P<id>\d+)'],
|
||||||
webpage, 'video id', group='id')
|
webpage, 'video id', default=mobj.group('vid'), group='id')
|
||||||
|
|
||||||
info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
|
info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
|
||||||
video_data = self._download_json(info_url, video_id, 'Downloading video JSON')
|
video_data = self._download_json(info_url, video_id, 'Downloading video JSON')
|
||||||
|
@ -90,36 +90,49 @@ class CBCIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
'skip': 'Geo-restricted to Canada',
|
'skip': 'Geo-restricted to Canada',
|
||||||
|
}, {
|
||||||
|
# multiple CBC.APP.Caffeine.initInstance(...)
|
||||||
|
'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks',
|
||||||
|
'id': 'dog-indoor-exercise-winter-1.3928238',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 6,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url)
|
return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _extract_player_init(self, player_init, display_id):
|
||||||
|
player_info = self._parse_json(player_init, display_id, js_to_json)
|
||||||
|
media_id = player_info.get('mediaId')
|
||||||
|
if not media_id:
|
||||||
|
clip_id = player_info['clipId']
|
||||||
|
feed = self._download_json(
|
||||||
|
'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id,
|
||||||
|
clip_id, fatal=False)
|
||||||
|
if feed:
|
||||||
|
media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str)
|
||||||
|
if not media_id:
|
||||||
|
media_id = self._download_json(
|
||||||
|
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
|
||||||
|
clip_id)['entries'][0]['id'].split('/')[-1]
|
||||||
|
return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
player_init = self._search_regex(
|
entries = [
|
||||||
r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage, 'player init',
|
self._extract_player_init(player_init, display_id)
|
||||||
default=None)
|
for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
|
||||||
if player_init:
|
entries.extend([
|
||||||
player_info = self._parse_json(player_init, display_id, js_to_json)
|
self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
||||||
media_id = player_info.get('mediaId')
|
for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)])
|
||||||
if not media_id:
|
return self.playlist_result(
|
||||||
clip_id = player_info['clipId']
|
entries, display_id,
|
||||||
feed = self._download_json(
|
self._og_search_title(webpage, fatal=False),
|
||||||
'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id,
|
self._og_search_description(webpage))
|
||||||
clip_id, fatal=False)
|
|
||||||
if feed:
|
|
||||||
media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str)
|
|
||||||
if not media_id:
|
|
||||||
media_id = self._download_json(
|
|
||||||
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
|
|
||||||
clip_id)['entries'][0]['id'].split('/')[-1]
|
|
||||||
return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
|
||||||
else:
|
|
||||||
entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)]
|
|
||||||
return self.playlist_result(entries)
|
|
||||||
|
|
||||||
|
|
||||||
class CBCPlayerIE(InfoExtractor):
|
class CBCPlayerIE(InfoExtractor):
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
@ -31,30 +33,35 @@ class ChaturbateIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
m3u8_url = self._search_regex(
|
m3u8_formats = [(m.group('id').lower(), m.group('url')) for m in re.finditer(
|
||||||
r'src=(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage,
|
r'hlsSource(?P<id>.+?)\s*=\s*(?P<q>["\'])(?P<url>http.+?)(?P=q)', webpage)]
|
||||||
'playlist', default=None, group='url')
|
|
||||||
|
|
||||||
if not m3u8_url:
|
if not m3u8_formats:
|
||||||
error = self._search_regex(
|
error = self._search_regex(
|
||||||
[r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>',
|
[r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>',
|
||||||
r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'],
|
r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'],
|
||||||
webpage, 'error', group='error', default=None)
|
webpage, 'error', group='error', default=None)
|
||||||
if not error:
|
if not error:
|
||||||
if any(p not in webpage for p in (
|
if any(p in webpage for p in (
|
||||||
self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')):
|
self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')):
|
||||||
error = self._ROOM_OFFLINE
|
error = self._ROOM_OFFLINE
|
||||||
if error:
|
if error:
|
||||||
raise ExtractorError(error, expected=True)
|
raise ExtractorError(error, expected=True)
|
||||||
raise ExtractorError('Unable to find stream URL')
|
raise ExtractorError('Unable to find stream URL')
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
formats = []
|
||||||
|
for m3u8_id, m3u8_url in m3u8_formats:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, ext='mp4',
|
||||||
|
# ffmpeg skips segments for fast m3u8
|
||||||
|
preference=-10 if m3u8_id == 'fast' else None,
|
||||||
|
m3u8_id=m3u8_id, fatal=False, live=True))
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._live_title(video_id),
|
'title': self._live_title(video_id),
|
||||||
'thumbnail': 'https://cdn-s.highwebmedia.com/uHK3McUtGCG3SMFcd4ZJsRv8/roomimage/%s.jpg' % video_id,
|
'thumbnail': 'https://roomimg.stream.highwebmedia.com/ri/%s.jpg' % video_id,
|
||||||
'age_limit': self._rta_search(webpage),
|
'age_limit': self._rta_search(webpage),
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
@ -19,6 +19,7 @@ class ChirbitIE(InfoExtractor):
|
|||||||
'title': 'md5:f542ea253f5255240be4da375c6a5d7e',
|
'title': 'md5:f542ea253f5255240be4da375c6a5d7e',
|
||||||
'description': 'md5:f24a4e22a71763e32da5fed59e47c770',
|
'description': 'md5:f24a4e22a71763e32da5fed59e47c770',
|
||||||
'duration': 306,
|
'duration': 306,
|
||||||
|
'uploader': 'Gerryaudio',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -54,6 +55,9 @@ class ChirbitIE(InfoExtractor):
|
|||||||
duration = parse_duration(self._search_regex(
|
duration = parse_duration(self._search_regex(
|
||||||
r'class=["\']c-length["\'][^>]*>([^<]+)',
|
r'class=["\']c-length["\'][^>]*>([^<]+)',
|
||||||
webpage, 'duration', fatal=False))
|
webpage, 'duration', fatal=False))
|
||||||
|
uploader = self._search_regex(
|
||||||
|
r'id=["\']chirbit-username["\'][^>]*>([^<]+)',
|
||||||
|
webpage, 'uploader', fatal=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': audio_id,
|
'id': audio_id,
|
||||||
@ -61,6 +65,7 @@ class ChirbitIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
|
'uploader': uploader,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,13 +1,11 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .mtv import MTVIE
|
from .mtv import MTVIE
|
||||||
from ..utils import ExtractorError
|
|
||||||
|
|
||||||
|
|
||||||
class CMTIE(MTVIE):
|
class CMTIE(MTVIE):
|
||||||
IE_NAME = 'cmt.com'
|
IE_NAME = 'cmt.com'
|
||||||
_VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P<videoid>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|full-episodes|video-clips)/(?P<id>[^/]+)'
|
||||||
_FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/'
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061',
|
'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061',
|
||||||
@ -33,17 +31,24 @@ class CMTIE(MTVIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172',
|
'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.cmt.com/full-episodes/537qb3/nashville-the-wayfaring-stranger-season-5-ep-501',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.cmt.com/video-clips/t9e4ci/nashville-juliette-in-2-minutes',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _transform_rtmp_url(cls, rtmp_video_url):
|
|
||||||
if 'error_not_available.swf' in rtmp_video_url:
|
|
||||||
raise ExtractorError(
|
|
||||||
'%s said: video is not available' % cls.IE_NAME, expected=True)
|
|
||||||
|
|
||||||
return super(CMTIE, cls)._transform_rtmp_url(rtmp_video_url)
|
|
||||||
|
|
||||||
def _extract_mgid(self, webpage):
|
def _extract_mgid(self, webpage):
|
||||||
return self._search_regex(
|
mgid = self._search_regex(
|
||||||
r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P<mgid>.+?)\1',
|
r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P<mgid>.+?)\1',
|
||||||
webpage, 'mgid', group='mgid')
|
webpage, 'mgid', group='mgid', default=None)
|
||||||
|
if not mgid:
|
||||||
|
mgid = self._extract_triforce_mgid(webpage)
|
||||||
|
return mgid
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
mgid = self._extract_mgid(webpage)
|
||||||
|
return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid)
|
||||||
|
@ -48,17 +48,8 @@ class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
playlist_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
mgid = self._extract_triforce_mgid(webpage, data_zone='t2_lc_promo1')
|
||||||
feed_json = self._search_regex(r'var triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, 'triforce feeed')
|
videos_info = self._get_videos_info(mgid)
|
||||||
feed = self._parse_json(feed_json, playlist_id)
|
|
||||||
zones = feed['manifest']['zones']
|
|
||||||
|
|
||||||
video_zone = zones['t2_lc_promo1']
|
|
||||||
feed = self._download_json(video_zone['feed'], playlist_id)
|
|
||||||
mgid = feed['result']['data']['id']
|
|
||||||
|
|
||||||
videos_info = self._get_videos_info(mgid, use_hls=True)
|
|
||||||
|
|
||||||
return videos_info
|
return videos_info
|
||||||
|
|
||||||
|
|
||||||
@ -94,12 +85,6 @@ class ToshIE(MTVServicesInfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _transform_rtmp_url(cls, rtmp_video_url):
|
|
||||||
new_urls = super(ToshIE, cls)._transform_rtmp_url(rtmp_video_url)
|
|
||||||
new_urls['rtmp'] = rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm')
|
|
||||||
return new_urls
|
|
||||||
|
|
||||||
|
|
||||||
class ComedyCentralTVIE(MTVServicesInfoExtractor):
|
class ComedyCentralTVIE(MTVServicesInfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)'
|
||||||
|
@ -1967,10 +1967,13 @@ class InfoExtractor(object):
|
|||||||
entries.append(media_info)
|
entries.append(media_info)
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
def _extract_akamai_formats(self, manifest_url, video_id):
|
def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
|
||||||
formats = []
|
formats = []
|
||||||
hdcore_sign = 'hdcore=3.7.0'
|
hdcore_sign = 'hdcore=3.7.0'
|
||||||
f4m_url = re.sub(r'(https?://.+?)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
|
f4m_url = re.sub(r'(https?://[^/+])/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
|
||||||
|
hds_host = hosts.get('hds')
|
||||||
|
if hds_host:
|
||||||
|
f4m_url = re.sub(r'(https?://)[^/]+', r'\1' + hds_host, f4m_url)
|
||||||
if 'hdcore=' not in f4m_url:
|
if 'hdcore=' not in f4m_url:
|
||||||
f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign
|
f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign
|
||||||
f4m_formats = self._extract_f4m_formats(
|
f4m_formats = self._extract_f4m_formats(
|
||||||
@ -1978,7 +1981,10 @@ class InfoExtractor(object):
|
|||||||
for entry in f4m_formats:
|
for entry in f4m_formats:
|
||||||
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
||||||
formats.extend(f4m_formats)
|
formats.extend(f4m_formats)
|
||||||
m3u8_url = re.sub(r'(https?://.+?)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
|
m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
|
||||||
|
hls_host = hosts.get('hls')
|
||||||
|
if hls_host:
|
||||||
|
m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||||
m3u8_id='hls', fatal=False))
|
m3u8_id='hls', fatal=False))
|
||||||
|
@ -12,6 +12,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
from .senateisvp import SenateISVPIE
|
from .senateisvp import SenateISVPIE
|
||||||
|
from .ustream import UstreamIE
|
||||||
|
|
||||||
|
|
||||||
class CSpanIE(InfoExtractor):
|
class CSpanIE(InfoExtractor):
|
||||||
@ -22,14 +23,13 @@ class CSpanIE(InfoExtractor):
|
|||||||
'md5': '94b29a4f131ff03d23471dd6f60b6a1d',
|
'md5': '94b29a4f131ff03d23471dd6f60b6a1d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '315139',
|
'id': '315139',
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
|
'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
|
||||||
'description': 'Attorney General Eric Holder speaks to reporters following the Supreme Court decision in [Shelby County v. Holder], in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced.',
|
|
||||||
},
|
},
|
||||||
|
'playlist_mincount': 2,
|
||||||
'skip': 'Regularly fails on travis, for unknown reasons',
|
'skip': 'Regularly fails on travis, for unknown reasons',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
|
'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
|
||||||
'md5': '8e5fbfabe6ad0f89f3012a7943c1287b',
|
# md5 is unstable
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'c4486943',
|
'id': 'c4486943',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -38,14 +38,11 @@ class CSpanIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
|
'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
|
||||||
'md5': '2ae5051559169baadba13fc35345ae74',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '342759',
|
'id': '342759',
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'General Motors Ignition Switch Recall',
|
'title': 'General Motors Ignition Switch Recall',
|
||||||
'duration': 14848,
|
|
||||||
'description': 'md5:118081aedd24bf1d3b68b3803344e7f3'
|
|
||||||
},
|
},
|
||||||
|
'playlist_mincount': 6,
|
||||||
}, {
|
}, {
|
||||||
# Video from senate.gov
|
# Video from senate.gov
|
||||||
'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers',
|
'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers',
|
||||||
@ -57,12 +54,30 @@ class CSpanIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # m3u8 downloads
|
'skip_download': True, # m3u8 downloads
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# Ustream embedded video
|
||||||
|
'url': 'https://www.c-span.org/video/?114917-1/armed-services',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '58428542',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'USHR07 Armed Services Committee',
|
||||||
|
'description': 'hsas00-2118-20150204-1000et-07\n\n\nUSHR07 Armed Services Committee',
|
||||||
|
'timestamp': 1423060374,
|
||||||
|
'upload_date': '20150204',
|
||||||
|
'uploader': 'HouseCommittee',
|
||||||
|
'uploader_id': '12987475',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
video_type = None
|
video_type = None
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
ustream_url = UstreamIE._extract_url(webpage)
|
||||||
|
if ustream_url:
|
||||||
|
return self.url_result(ustream_url, UstreamIE.ie_key())
|
||||||
|
|
||||||
# We first look for clipid, because clipprog always appears before
|
# We first look for clipid, because clipprog always appears before
|
||||||
patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
|
patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
|
||||||
results = list(filter(None, (re.search(p, webpage) for p in patterns)))
|
results = list(filter(None, (re.search(p, webpage) for p in patterns)))
|
||||||
|
@ -66,7 +66,7 @@ class DramaFeverBaseIE(AMPIE):
|
|||||||
|
|
||||||
class DramaFeverIE(DramaFeverBaseIE):
|
class DramaFeverIE(DramaFeverBaseIE):
|
||||||
IE_NAME = 'dramafever'
|
IE_NAME = 'dramafever'
|
||||||
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
|
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
|
'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -103,6 +103,9 @@ class DramaFeverIE(DramaFeverBaseIE):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.dramafever.com/zh-cn/drama/4972/15/Doctor_Romantic/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -148,7 +151,7 @@ class DramaFeverIE(DramaFeverBaseIE):
|
|||||||
|
|
||||||
class DramaFeverSeriesIE(DramaFeverBaseIE):
|
class DramaFeverSeriesIE(DramaFeverBaseIE):
|
||||||
IE_NAME = 'dramafever:series'
|
IE_NAME = 'dramafever:series'
|
||||||
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$'
|
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/',
|
'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
39
youtube_dl/extractor/egghead.py
Normal file
39
youtube_dl/extractor/egghead.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class EggheadCourseIE(InfoExtractor):
|
||||||
|
IE_DESC = 'egghead.io course'
|
||||||
|
IE_NAME = 'egghead:course'
|
||||||
|
_VALID_URL = r'https://egghead\.io/courses/(?P<id>[a-zA-Z_0-9-]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
|
||||||
|
'playlist_count': 29,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'professor-frisby-introduces-composable-functional-javascript',
|
||||||
|
'title': 'Professor Frisby Introduces Composable Functional JavaScript',
|
||||||
|
'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
title = self._html_search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'title')
|
||||||
|
ul = self._search_regex(r'(?s)<ul class="series-lessons-list">(.*?)</ul>', webpage, 'session list')
|
||||||
|
|
||||||
|
found = re.findall(r'(?s)<a class="[^"]*"\s*href="([^"]+)">\s*<li class="item', ul)
|
||||||
|
entries = [self.url_result(m) for m in found]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': playlist_id,
|
||||||
|
'title': title,
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'entries': entries,
|
||||||
|
}
|
@ -77,6 +77,10 @@ from .awaan import (
|
|||||||
AWAANLiveIE,
|
AWAANLiveIE,
|
||||||
AWAANSeasonIE,
|
AWAANSeasonIE,
|
||||||
)
|
)
|
||||||
|
from .azmedien import (
|
||||||
|
AZMedienIE,
|
||||||
|
AZMedienShowIE,
|
||||||
|
)
|
||||||
from .azubu import AzubuIE, AzubuLiveIE
|
from .azubu import AzubuIE, AzubuLiveIE
|
||||||
from .baidu import BaiduVideoIE
|
from .baidu import BaiduVideoIE
|
||||||
from .bambuser import BambuserIE, BambuserChannelIE
|
from .bambuser import BambuserIE, BambuserChannelIE
|
||||||
@ -88,6 +92,7 @@ from .bbc import (
|
|||||||
BBCCoUkPlaylistIE,
|
BBCCoUkPlaylistIE,
|
||||||
BBCIE,
|
BBCIE,
|
||||||
)
|
)
|
||||||
|
from .beampro import BeamProLiveIE
|
||||||
from .beeg import BeegIE
|
from .beeg import BeegIE
|
||||||
from .behindkink import BehindKinkIE
|
from .behindkink import BehindKinkIE
|
||||||
from .bellmedia import BellMediaIE
|
from .bellmedia import BellMediaIE
|
||||||
@ -252,6 +257,7 @@ from .dw import (
|
|||||||
from .eagleplatform import EaglePlatformIE
|
from .eagleplatform import EaglePlatformIE
|
||||||
from .ebaumsworld import EbaumsWorldIE
|
from .ebaumsworld import EbaumsWorldIE
|
||||||
from .echomsk import EchoMskIE
|
from .echomsk import EchoMskIE
|
||||||
|
from .egghead import EggheadCourseIE
|
||||||
from .ehow import EHowIE
|
from .ehow import EHowIE
|
||||||
from .eighttracks import EightTracksIE
|
from .eighttracks import EightTracksIE
|
||||||
from .einthusan import EinthusanIE
|
from .einthusan import EinthusanIE
|
||||||
@ -366,6 +372,7 @@ from .hgtv import (
|
|||||||
)
|
)
|
||||||
from .historicfilms import HistoricFilmsIE
|
from .historicfilms import HistoricFilmsIE
|
||||||
from .hitbox import HitboxIE, HitboxLiveIE
|
from .hitbox import HitboxIE, HitboxLiveIE
|
||||||
|
from .hitrecord import HitRecordIE
|
||||||
from .hornbunny import HornBunnyIE
|
from .hornbunny import HornBunnyIE
|
||||||
from .hotnewhiphop import HotNewHipHopIE
|
from .hotnewhiphop import HotNewHipHopIE
|
||||||
from .hotstar import HotStarIE
|
from .hotstar import HotStarIE
|
||||||
@ -393,6 +400,7 @@ from .imgur import (
|
|||||||
ImgurAlbumIE,
|
ImgurAlbumIE,
|
||||||
)
|
)
|
||||||
from .ina import InaIE
|
from .ina import InaIE
|
||||||
|
from .inc import IncIE
|
||||||
from .indavideo import (
|
from .indavideo import (
|
||||||
IndavideoIE,
|
IndavideoIE,
|
||||||
IndavideoEmbedIE,
|
IndavideoEmbedIE,
|
||||||
@ -716,6 +724,7 @@ from .polskieradio import (
|
|||||||
)
|
)
|
||||||
from .porn91 import Porn91IE
|
from .porn91 import Porn91IE
|
||||||
from .porncom import PornComIE
|
from .porncom import PornComIE
|
||||||
|
from .pornflip import PornFlipIE
|
||||||
from .pornhd import PornHdIE
|
from .pornhd import PornHdIE
|
||||||
from .pornhub import (
|
from .pornhub import (
|
||||||
PornHubIE,
|
PornHubIE,
|
||||||
|
@ -86,18 +86,43 @@ class FirstTVIE(InfoExtractor):
|
|||||||
title = item['title']
|
title = item['title']
|
||||||
quality = qualities(QUALITIES)
|
quality = qualities(QUALITIES)
|
||||||
formats = []
|
formats = []
|
||||||
|
path = None
|
||||||
for f in item.get('mbr', []):
|
for f in item.get('mbr', []):
|
||||||
src = f.get('src')
|
src = f.get('src')
|
||||||
if not src or not isinstance(src, compat_str):
|
if not src or not isinstance(src, compat_str):
|
||||||
continue
|
continue
|
||||||
tbr = int_or_none(self._search_regex(
|
tbr = int_or_none(self._search_regex(
|
||||||
r'_(\d{3,})\.mp4', src, 'tbr', default=None))
|
r'_(\d{3,})\.mp4', src, 'tbr', default=None))
|
||||||
|
if not path:
|
||||||
|
path = self._search_regex(
|
||||||
|
r'//[^/]+/(.+?)_\d+\.mp4', src,
|
||||||
|
'm3u8 path', default=None)
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': src,
|
'url': src,
|
||||||
'format_id': f.get('name'),
|
'format_id': f.get('name'),
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'quality': quality(f.get('name')),
|
'source_preference': quality(f.get('name')),
|
||||||
})
|
})
|
||||||
|
# m3u8 URL format is reverse engineered from [1] (search for
|
||||||
|
# master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru)
|
||||||
|
# is taken from [2].
|
||||||
|
# 1. http://static.1tv.ru/player/eump1tv-current/eump-1tv.all.min.js?rnd=9097422834:formatted
|
||||||
|
# 2. http://static.1tv.ru/player/eump1tv-config/config-main.js?rnd=9097422834
|
||||||
|
if not path and len(formats) == 1:
|
||||||
|
path = self._search_regex(
|
||||||
|
r'//[^/]+/(.+?$)', formats[0]['url'],
|
||||||
|
'm3u8 path', default=None)
|
||||||
|
if path:
|
||||||
|
if len(formats) == 1:
|
||||||
|
m3u8_path = ','
|
||||||
|
else:
|
||||||
|
tbrs = [compat_str(t) for t in sorted(f['tbr'] for f in formats)]
|
||||||
|
m3u8_path = '_,%s,%s' % (','.join(tbrs), '.mp4')
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
'http://balancer-vod.1tv.ru/%s%s.urlset/master.m3u8'
|
||||||
|
% (path, m3u8_path),
|
||||||
|
display_id, 'mp4',
|
||||||
|
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
thumbnail = item.get('poster') or self._og_search_thumbnail(webpage)
|
thumbnail = item.get('poster') or self._og_search_thumbnail(webpage)
|
||||||
|
@ -81,7 +81,7 @@ class FlipagramIE(InfoExtractor):
|
|||||||
'filesize': int_or_none(cover.get('size')),
|
'filesize': int_or_none(cover.get('size')),
|
||||||
} for cover in flipagram.get('covers', []) if cover.get('url')]
|
} for cover in flipagram.get('covers', []) if cover.get('url')]
|
||||||
|
|
||||||
# Note that this only retrieves comments that are initally loaded.
|
# Note that this only retrieves comments that are initially loaded.
|
||||||
# For videos with large amounts of comments, most won't be retrieved.
|
# For videos with large amounts of comments, most won't be retrieved.
|
||||||
comments = []
|
comments = []
|
||||||
for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []):
|
for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []):
|
||||||
|
@ -3,10 +3,16 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
get_element_by_class,
|
||||||
|
get_element_by_id,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class FreesoundIE(InfoExtractor):
|
class FreesoundIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?freesound\.org/people/([^/]+)/sounds/(?P<id>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?freesound\.org/people/[^/]+/sounds/(?P<id>[^/]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.freesound.org/people/miklovan/sounds/194503/',
|
'url': 'http://www.freesound.org/people/miklovan/sounds/194503/',
|
||||||
'md5': '12280ceb42c81f19a515c745eae07650',
|
'md5': '12280ceb42c81f19a515c745eae07650',
|
||||||
@ -14,26 +20,60 @@ class FreesoundIE(InfoExtractor):
|
|||||||
'id': '194503',
|
'id': '194503',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'gulls in the city.wav',
|
'title': 'gulls in the city.wav',
|
||||||
'uploader': 'miklovan',
|
|
||||||
'description': 'the sounds of seagulls in the city',
|
'description': 'the sounds of seagulls in the city',
|
||||||
|
'duration': 130.233,
|
||||||
|
'uploader': 'miklovan',
|
||||||
|
'upload_date': '20130715',
|
||||||
|
'tags': list,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
audio_id = self._match_id(url)
|
||||||
music_id = mobj.group('id')
|
|
||||||
webpage = self._download_webpage(url, music_id)
|
webpage = self._download_webpage(url, audio_id)
|
||||||
title = self._html_search_regex(
|
|
||||||
r'<div id="single_sample_header">.*?<a href="#">(.+?)</a>',
|
audio_url = self._og_search_property('audio', webpage, 'song url')
|
||||||
webpage, 'music title', flags=re.DOTALL)
|
title = self._og_search_property('audio:title', webpage, 'song title')
|
||||||
|
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'<div id="sound_description">(.*?)</div>', webpage, 'description',
|
r'(?s)id=["\']sound_description["\'][^>]*>(.+?)</div>',
|
||||||
fatal=False, flags=re.DOTALL)
|
webpage, 'description', fatal=False)
|
||||||
|
|
||||||
|
duration = float_or_none(
|
||||||
|
get_element_by_class('duration', webpage), scale=1000)
|
||||||
|
|
||||||
|
upload_date = unified_strdate(get_element_by_id('sound_date', webpage))
|
||||||
|
uploader = self._og_search_property(
|
||||||
|
'audio:artist', webpage, 'uploader', fatal=False)
|
||||||
|
|
||||||
|
channels = self._html_search_regex(
|
||||||
|
r'Channels</dt><dd>(.+?)</dd>', webpage,
|
||||||
|
'channels info', fatal=False)
|
||||||
|
|
||||||
|
tags_str = get_element_by_class('tags', webpage)
|
||||||
|
tags = re.findall(r'<a[^>]+>([^<]+)', tags_str) if tags_str else None
|
||||||
|
|
||||||
|
audio_urls = [audio_url]
|
||||||
|
|
||||||
|
LQ_FORMAT = '-lq.mp3'
|
||||||
|
if LQ_FORMAT in audio_url:
|
||||||
|
audio_urls.append(audio_url.replace(LQ_FORMAT, '-hq.mp3'))
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': format_url,
|
||||||
|
'format_note': channels,
|
||||||
|
'quality': quality,
|
||||||
|
} for quality, format_url in enumerate(audio_urls)]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': music_id,
|
'id': audio_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'url': self._og_search_property('audio', webpage, 'music url'),
|
|
||||||
'uploader': self._og_search_property('audio:artist', webpage, 'music uploader'),
|
|
||||||
'description': description,
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'uploader': uploader,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'tags': tags,
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -78,6 +78,8 @@ from .vbox7 import Vbox7IE
|
|||||||
from .dbtv import DBTVIE
|
from .dbtv import DBTVIE
|
||||||
from .piksel import PikselIE
|
from .piksel import PikselIE
|
||||||
from .videa import VideaIE
|
from .videa import VideaIE
|
||||||
|
from .twentymin import TwentyMinutenIE
|
||||||
|
from .ustream import UstreamIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@ -422,6 +424,26 @@ class GenericIE(InfoExtractor):
|
|||||||
'skip_download': True, # m3u8 download
|
'skip_download': True, # m3u8 download
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# Brightcove with alternative playerID key
|
||||||
|
'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'nmeth.2062_SV1',
|
||||||
|
'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2228375078001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'nmeth.2062-sv1',
|
||||||
|
'description': 'nmeth.2062-sv1',
|
||||||
|
'timestamp': 1363357591,
|
||||||
|
'upload_date': '20130315',
|
||||||
|
'uploader': 'Nature Publishing Group',
|
||||||
|
'uploader_id': '1964492299001',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
},
|
||||||
# ooyala video
|
# ooyala video
|
||||||
{
|
{
|
||||||
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
|
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
|
||||||
@ -567,17 +589,6 @@ class GenericIE(InfoExtractor):
|
|||||||
'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
|
'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# Embedded Ustream video
|
|
||||||
{
|
|
||||||
'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
|
|
||||||
'md5': '27b99cdb639c9b12a79bca876a073417',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '45734260',
|
|
||||||
'ext': 'flv',
|
|
||||||
'uploader': 'AU SPA: The NSA and Privacy',
|
|
||||||
'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
|
|
||||||
}
|
|
||||||
},
|
|
||||||
# nowvideo embed hidden behind percent encoding
|
# nowvideo embed hidden behind percent encoding
|
||||||
{
|
{
|
||||||
'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
|
'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
|
||||||
@ -1448,6 +1459,20 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'playlist_mincount': 2,
|
'playlist_mincount': 2,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# 20 minuten embed
|
||||||
|
'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '523629',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'So kommen Sie bei Eis und Schnee sicher an',
|
||||||
|
'description': 'md5:117c212f64b25e3d95747e5276863f7d',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'add_ie': [TwentyMinutenIE.ie_key()],
|
||||||
|
}
|
||||||
# {
|
# {
|
||||||
# # TODO: find another test
|
# # TODO: find another test
|
||||||
# # http://schema.org/VideoObject
|
# # http://schema.org/VideoObject
|
||||||
@ -1939,7 +1964,14 @@ class GenericIE(InfoExtractor):
|
|||||||
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
|
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
|
||||||
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))
|
embed_token = self._search_regex(
|
||||||
|
r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
|
||||||
|
webpage, 'ooyala embed token', default=None)
|
||||||
|
return OoyalaIE._build_url_result(smuggle_url(
|
||||||
|
mobj.group('ec'), {
|
||||||
|
'domain': url,
|
||||||
|
'embed_token': embed_token,
|
||||||
|
}))
|
||||||
|
|
||||||
# Look for multiple Ooyala embeds on SBN network websites
|
# Look for multiple Ooyala embeds on SBN network websites
|
||||||
mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
|
mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
|
||||||
@ -2070,10 +2102,9 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.url_result(mobj.group('url'), 'TED')
|
return self.url_result(mobj.group('url'), 'TED')
|
||||||
|
|
||||||
# Look for embedded Ustream videos
|
# Look for embedded Ustream videos
|
||||||
mobj = re.search(
|
ustream_url = UstreamIE._extract_url(webpage)
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
|
if ustream_url:
|
||||||
if mobj is not None:
|
return self.url_result(ustream_url, UstreamIE.ie_key())
|
||||||
return self.url_result(mobj.group('url'), 'Ustream')
|
|
||||||
|
|
||||||
# Look for embedded arte.tv player
|
# Look for embedded arte.tv player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
@ -2394,6 +2425,12 @@ class GenericIE(InfoExtractor):
|
|||||||
if videa_urls:
|
if videa_urls:
|
||||||
return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key())
|
return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key())
|
||||||
|
|
||||||
|
# Look for 20 minuten embeds
|
||||||
|
twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
|
||||||
|
if twentymin_urls:
|
||||||
|
return _playlist_from_matches(
|
||||||
|
twentymin_urls, ie=TwentyMinutenIE.ie_key())
|
||||||
|
|
||||||
# Looking for http://schema.org/VideoObject
|
# Looking for http://schema.org/VideoObject
|
||||||
json_ld = self._search_json_ld(
|
json_ld = self._search_json_ld(
|
||||||
webpage, video_id, default={}, expected_type='VideoObject')
|
webpage, video_id, default={}, expected_type='VideoObject')
|
||||||
|
68
youtube_dl/extractor/hitrecord.py
Normal file
68
youtube_dl/extractor/hitrecord.py
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class HitRecordIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?hitrecord\.org/records/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://hitrecord.org/records/2954362',
|
||||||
|
'md5': 'fe1cdc2023bce0bbb95c39c57426aa71',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2954362',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'A Very Different World (HITRECORD x ACLU)',
|
||||||
|
'description': 'md5:e62defaffab5075a5277736bead95a3d',
|
||||||
|
'duration': 139.327,
|
||||||
|
'timestamp': 1471557582,
|
||||||
|
'upload_date': '20160818',
|
||||||
|
'uploader': 'Zuzi.C12',
|
||||||
|
'uploader_id': '362811',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'tags': list,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
video = self._download_json(
|
||||||
|
'https://hitrecord.org/api/web/records/%s' % video_id, video_id)
|
||||||
|
|
||||||
|
title = video['title']
|
||||||
|
video_url = video['source_url']['mp4_url']
|
||||||
|
|
||||||
|
tags = None
|
||||||
|
tags_list = try_get(video, lambda x: x['tags'], list)
|
||||||
|
if tags_list:
|
||||||
|
tags = [
|
||||||
|
t['text']
|
||||||
|
for t in tags_list
|
||||||
|
if isinstance(t, dict) and t.get('text') and
|
||||||
|
isinstance(t['text'], compat_str)]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'description': clean_html(video.get('body')),
|
||||||
|
'duration': float_or_none(video.get('duration'), 1000),
|
||||||
|
'timestamp': int_or_none(video.get('created_at_i')),
|
||||||
|
'uploader': try_get(
|
||||||
|
video, lambda x: x['user']['username'], compat_str),
|
||||||
|
'uploader_id': try_get(
|
||||||
|
video, lambda x: compat_str(x['user']['id'])),
|
||||||
|
'view_count': int_or_none(video.get('total_views_count')),
|
||||||
|
'like_count': int_or_none(video.get('hearts_count')),
|
||||||
|
'comment_count': int_or_none(video.get('comments_count')),
|
||||||
|
'tags': tags,
|
||||||
|
}
|
@ -13,7 +13,7 @@ from ..utils import (
|
|||||||
class ImdbIE(InfoExtractor):
|
class ImdbIE(InfoExtractor):
|
||||||
IE_NAME = 'imdb'
|
IE_NAME = 'imdb'
|
||||||
IE_DESC = 'Internet Movie Database trailers'
|
IE_DESC = 'Internet Movie Database trailers'
|
||||||
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-)vi(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-|videoplayer/)vi(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
||||||
@ -32,6 +32,9 @@ class ImdbIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897',
|
'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.imdb.com/videoplayer/vi1562949145',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
41
youtube_dl/extractor/inc.py
Normal file
41
youtube_dl/extractor/inc.py
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .kaltura import KalturaIE
|
||||||
|
|
||||||
|
|
||||||
|
class IncIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?inc\.com/(?:[^/]+/)+(?P<id>[^.]+).html'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.inc.com/tip-sheet/bill-gates-says-these-5-books-will-make-you-smarter.html',
|
||||||
|
'md5': '7416739c9c16438c09fa35619d6ba5cb',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1_wqig47aq',
|
||||||
|
'ext': 'mov',
|
||||||
|
'title': 'Bill Gates Says These 5 Books Will Make You Smarter',
|
||||||
|
'description': 'md5:bea7ff6cce100886fc1995acb743237e',
|
||||||
|
'timestamp': 1474414430,
|
||||||
|
'upload_date': '20160920',
|
||||||
|
'uploader_id': 'video@inc.com',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.inc.com/video/david-whitford/founders-forum-tripadvisor-steve-kaufer-most-enjoyable-moment-for-entrepreneur.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
partner_id = self._search_regex(
|
||||||
|
r'var\s+_?bizo_data_partner_id\s*=\s*["\'](\d+)', webpage, 'partner id')
|
||||||
|
|
||||||
|
kaltura_id = self._parse_json(self._search_regex(
|
||||||
|
r'pageInfo\.videos\s*=\s*\[(.+)\];', webpage, 'kaltura id'),
|
||||||
|
display_id)['vid_kaltura_id']
|
||||||
|
|
||||||
|
return self.url_result(
|
||||||
|
'kaltura:%s:%s' % (partner_id, kaltura_id), KalturaIE.ie_key())
|
@ -59,14 +59,26 @@ class LimelightBaseIE(InfoExtractor):
|
|||||||
format_id = 'rtmp'
|
format_id = 'rtmp'
|
||||||
if stream.get('videoBitRate'):
|
if stream.get('videoBitRate'):
|
||||||
format_id += '-%d' % int_or_none(stream['videoBitRate'])
|
format_id += '-%d' % int_or_none(stream['videoBitRate'])
|
||||||
http_url = 'http://cpl.delvenetworks.com/' + rtmp.group('playpath')[4:]
|
http_format_id = format_id.replace('rtmp', 'http')
|
||||||
urls.append(http_url)
|
|
||||||
http_fmt = fmt.copy()
|
CDN_HOSTS = (
|
||||||
http_fmt.update({
|
('delvenetworks.com', 'cpl.delvenetworks.com'),
|
||||||
'url': http_url,
|
('video.llnw.net', 's2.content.video.llnw.net'),
|
||||||
'format_id': format_id.replace('rtmp', 'http'),
|
)
|
||||||
})
|
for cdn_host, http_host in CDN_HOSTS:
|
||||||
formats.append(http_fmt)
|
if cdn_host not in rtmp.group('host').lower():
|
||||||
|
continue
|
||||||
|
http_url = 'http://%s/%s' % (http_host, rtmp.group('playpath')[4:])
|
||||||
|
urls.append(http_url)
|
||||||
|
if self._is_valid_url(http_url, video_id, http_format_id):
|
||||||
|
http_fmt = fmt.copy()
|
||||||
|
http_fmt.update({
|
||||||
|
'url': http_url,
|
||||||
|
'format_id': http_format_id,
|
||||||
|
})
|
||||||
|
formats.append(http_fmt)
|
||||||
|
break
|
||||||
|
|
||||||
fmt.update({
|
fmt.update({
|
||||||
'url': rtmp.group('url'),
|
'url': rtmp.group('url'),
|
||||||
'play_path': rtmp.group('playpath'),
|
'play_path': rtmp.group('playpath'),
|
||||||
|
@ -190,7 +190,7 @@ class MiTeleIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
# for some reason only HLS is supported
|
# for some reason only HLS is supported
|
||||||
'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8'}),
|
'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8,dash'}),
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
|
@ -16,7 +16,6 @@ from ..utils import (
|
|||||||
clean_html,
|
clean_html,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
parse_count,
|
|
||||||
str_to_int,
|
str_to_int,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -36,7 +35,6 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'uploader_id': 'dholbach',
|
'uploader_id': 'dholbach',
|
||||||
'thumbnail': r're:https?://.*\.jpg',
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
|
'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
|
||||||
@ -49,7 +47,6 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'uploader_id': 'gillespeterson',
|
'uploader_id': 'gillespeterson',
|
||||||
'thumbnail': 're:https?://.*',
|
'thumbnail': 're:https?://.*',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
|
'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
|
||||||
@ -89,26 +86,18 @@ class MixcloudIE(InfoExtractor):
|
|||||||
|
|
||||||
song_url = play_info['stream_url']
|
song_url = play_info['stream_url']
|
||||||
|
|
||||||
PREFIX = (
|
title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title')
|
||||||
r'm-play-on-spacebar[^>]+'
|
|
||||||
r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
|
|
||||||
title = self._html_search_regex(
|
|
||||||
PREFIX + r'm-title="([^"]+)"', webpage, 'title')
|
|
||||||
thumbnail = self._proto_relative_url(self._html_search_regex(
|
thumbnail = self._proto_relative_url(self._html_search_regex(
|
||||||
PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail',
|
r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False))
|
||||||
fatal=False))
|
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
PREFIX + r'm-owner-name="([^"]+)"',
|
r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False)
|
||||||
webpage, 'uploader', fatal=False)
|
|
||||||
uploader_id = self._search_regex(
|
uploader_id = self._search_regex(
|
||||||
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
|
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
like_count = parse_count(self._search_regex(
|
|
||||||
r'\bbutton-favorite[^>]+>.*?<span[^>]+class=["\']toggle-number[^>]+>\s*([^<]+)',
|
|
||||||
webpage, 'like count', default=None))
|
|
||||||
view_count = str_to_int(self._search_regex(
|
view_count = str_to_int(self._search_regex(
|
||||||
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
||||||
r'/listeners/?">([0-9,.]+)</a>'],
|
r'/listeners/?">([0-9,.]+)</a>',
|
||||||
|
r'm-tooltip=["\']([\d,.]+) plays'],
|
||||||
webpage, 'play count', default=None))
|
webpage, 'play count', default=None))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@ -120,7 +109,6 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'like_count': like_count,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -13,11 +13,11 @@ from ..utils import (
|
|||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
NO_DEFAULT,
|
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
timeconvert,
|
timeconvert,
|
||||||
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
url_basename,
|
url_basename,
|
||||||
@ -42,15 +42,6 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
# Remove the templates, like &device={device}
|
# Remove the templates, like &device={device}
|
||||||
return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url)
|
return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url)
|
||||||
|
|
||||||
# This was originally implemented for ComedyCentral, but it also works here
|
|
||||||
@classmethod
|
|
||||||
def _transform_rtmp_url(cls, rtmp_video_url):
|
|
||||||
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
|
|
||||||
if not m:
|
|
||||||
return {'rtmp': rtmp_video_url}
|
|
||||||
base = 'http://viacommtvstrmfs.fplive.net/'
|
|
||||||
return {'http': base + m.group('finalid')}
|
|
||||||
|
|
||||||
def _get_feed_url(self, uri):
|
def _get_feed_url(self, uri):
|
||||||
return self._FEED_URL
|
return self._FEED_URL
|
||||||
|
|
||||||
@ -88,24 +79,31 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for rendition in mdoc.findall('.//rendition'):
|
for rendition in mdoc.findall('.//rendition'):
|
||||||
if rendition.attrib['method'] == 'hls':
|
if rendition.get('method') == 'hls':
|
||||||
hls_url = rendition.find('./src').text
|
hls_url = rendition.find('./src').text
|
||||||
formats.extend(self._extract_m3u8_formats(hls_url, video_id, ext='mp4'))
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
hls_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls'))
|
||||||
else:
|
else:
|
||||||
# fms
|
# fms
|
||||||
try:
|
try:
|
||||||
_, _, ext = rendition.attrib['type'].partition('/')
|
_, _, ext = rendition.attrib['type'].partition('/')
|
||||||
rtmp_video_url = rendition.find('./src').text
|
rtmp_video_url = rendition.find('./src').text
|
||||||
|
if 'error_not_available.swf' in rtmp_video_url:
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: video is not available' % self.IE_NAME,
|
||||||
|
expected=True)
|
||||||
if rtmp_video_url.endswith('siteunavail.png'):
|
if rtmp_video_url.endswith('siteunavail.png'):
|
||||||
continue
|
continue
|
||||||
new_urls = self._transform_rtmp_url(rtmp_video_url)
|
|
||||||
formats.extend([{
|
formats.extend([{
|
||||||
'ext': 'flv' if new_url.startswith('rtmp') else ext,
|
'ext': 'flv' if rtmp_video_url.startswith('rtmp') else ext,
|
||||||
'url': new_url,
|
'url': rtmp_video_url,
|
||||||
'format_id': '-'.join(filter(None, [kind, rendition.get('bitrate')])),
|
'format_id': '-'.join(filter(None, [
|
||||||
|
'rtmp' if rtmp_video_url.startswith('rtmp') else None,
|
||||||
|
rendition.get('bitrate')])),
|
||||||
'width': int(rendition.get('width')),
|
'width': int(rendition.get('width')),
|
||||||
'height': int(rendition.get('height')),
|
'height': int(rendition.get('height')),
|
||||||
} for kind, new_url in new_urls.items()])
|
}])
|
||||||
except (KeyError, TypeError):
|
except (KeyError, TypeError):
|
||||||
raise ExtractorError('Invalid rendition field.')
|
raise ExtractorError('Invalid rendition field.')
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
@ -123,7 +121,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
} for typographic in transcript.findall('./typographic')]
|
} for typographic in transcript.findall('./typographic')]
|
||||||
return subtitles
|
return subtitles
|
||||||
|
|
||||||
def _get_video_info(self, itemdoc, use_hls):
|
def _get_video_info(self, itemdoc, use_hls=True):
|
||||||
uri = itemdoc.find('guid').text
|
uri = itemdoc.find('guid').text
|
||||||
video_id = self._id_from_uri(uri)
|
video_id = self._id_from_uri(uri)
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
@ -193,13 +191,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
data['lang'] = self._LANG
|
data['lang'] = self._LANG
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def _get_videos_info(self, uri, use_hls=False):
|
def _get_videos_info(self, uri, use_hls=True):
|
||||||
video_id = self._id_from_uri(uri)
|
video_id = self._id_from_uri(uri)
|
||||||
feed_url = self._get_feed_url(uri)
|
feed_url = self._get_feed_url(uri)
|
||||||
info_url = update_url_query(feed_url, self._get_feed_query(uri))
|
info_url = update_url_query(feed_url, self._get_feed_query(uri))
|
||||||
return self._get_videos_info_from_url(info_url, video_id, use_hls)
|
return self._get_videos_info_from_url(info_url, video_id, use_hls)
|
||||||
|
|
||||||
def _get_videos_info_from_url(self, url, video_id, use_hls):
|
def _get_videos_info_from_url(self, url, video_id, use_hls=True):
|
||||||
idoc = self._download_xml(
|
idoc = self._download_xml(
|
||||||
url, video_id,
|
url, video_id,
|
||||||
'Downloading info', transform_source=fix_xml_ampersands)
|
'Downloading info', transform_source=fix_xml_ampersands)
|
||||||
@ -211,7 +209,28 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
[self._get_video_info(item, use_hls) for item in idoc.findall('.//item')],
|
[self._get_video_info(item, use_hls) for item in idoc.findall('.//item')],
|
||||||
playlist_title=title, playlist_description=description)
|
playlist_title=title, playlist_description=description)
|
||||||
|
|
||||||
def _extract_mgid(self, webpage, default=NO_DEFAULT):
|
def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None):
|
||||||
|
triforce_feed = self._parse_json(self._search_regex(
|
||||||
|
r'triforceManifestFeed\s*=\s*({.+?})\s*;\s*\n', webpage,
|
||||||
|
'triforce feed', default='{}'), video_id, fatal=False)
|
||||||
|
|
||||||
|
data_zone = self._search_regex(
|
||||||
|
r'data-zone=(["\'])(?P<zone>.+?_lc_promo.*?)\1', webpage,
|
||||||
|
'data zone', default=data_zone, group='zone')
|
||||||
|
|
||||||
|
feed_url = try_get(
|
||||||
|
triforce_feed, lambda x: x['manifest']['zones'][data_zone]['feed'],
|
||||||
|
compat_str)
|
||||||
|
if not feed_url:
|
||||||
|
return
|
||||||
|
|
||||||
|
feed = self._download_json(feed_url, video_id, fatal=False)
|
||||||
|
if not feed:
|
||||||
|
return
|
||||||
|
|
||||||
|
return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
|
||||||
|
|
||||||
|
def _extract_mgid(self, webpage):
|
||||||
try:
|
try:
|
||||||
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf
|
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf
|
||||||
# or http://media.mtvnservices.com/{mgid}
|
# or http://media.mtvnservices.com/{mgid}
|
||||||
@ -231,7 +250,11 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
sm4_embed = self._html_search_meta(
|
sm4_embed = self._html_search_meta(
|
||||||
'sm4:video:embed', webpage, 'sm4 embed', default='')
|
'sm4:video:embed', webpage, 'sm4 embed', default='')
|
||||||
mgid = self._search_regex(
|
mgid = self._search_regex(
|
||||||
r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=default)
|
r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None)
|
||||||
|
|
||||||
|
if not mgid:
|
||||||
|
mgid = self._extract_triforce_mgid(webpage)
|
||||||
|
|
||||||
return mgid
|
return mgid
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -12,10 +12,10 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class NaverIE(InfoExtractor):
|
class NaverIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/v/(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://tvcast.naver.com/v/81652',
|
'url': 'http://tv.naver.com/v/81652',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '81652',
|
'id': '81652',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -24,7 +24,7 @@ class NaverIE(InfoExtractor):
|
|||||||
'upload_date': '20130903',
|
'upload_date': '20130903',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://tvcast.naver.com/v/395837',
|
'url': 'http://tv.naver.com/v/395837',
|
||||||
'md5': '638ed4c12012c458fefcddfd01f173cd',
|
'md5': '638ed4c12012c458fefcddfd01f173cd',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '395837',
|
'id': '395837',
|
||||||
@ -34,6 +34,9 @@ class NaverIE(InfoExtractor):
|
|||||||
'upload_date': '20150519',
|
'upload_date': '20150519',
|
||||||
},
|
},
|
||||||
'skip': 'Georestricted',
|
'skip': 'Georestricted',
|
||||||
|
}, {
|
||||||
|
'url': 'http://tvcast.naver.com/v/81652',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urlparse
|
||||||
from ..utils import parse_iso8601
|
from ..utils import parse_iso8601
|
||||||
|
|
||||||
|
|
||||||
@ -30,6 +31,12 @@ class NextMediaIE(InfoExtractor):
|
|||||||
return self._extract_from_nextmedia_page(news_id, url, page)
|
return self._extract_from_nextmedia_page(news_id, url, page)
|
||||||
|
|
||||||
def _extract_from_nextmedia_page(self, news_id, url, page):
|
def _extract_from_nextmedia_page(self, news_id, url, page):
|
||||||
|
redirection_url = self._search_regex(
|
||||||
|
r'window\.location\.href\s*=\s*([\'"])(?P<url>(?!\1).+)\1',
|
||||||
|
page, 'redirection URL', default=None, group='url')
|
||||||
|
if redirection_url:
|
||||||
|
return self.url_result(compat_urlparse.urljoin(url, redirection_url))
|
||||||
|
|
||||||
title = self._fetch_title(page)
|
title = self._fetch_title(page)
|
||||||
video_url = self._search_regex(self._URL_PATTERN, page, 'video url')
|
video_url = self._search_regex(self._URL_PATTERN, page, 'video url')
|
||||||
|
|
||||||
@ -93,7 +100,7 @@ class NextMediaActionNewsIE(NextMediaIE):
|
|||||||
|
|
||||||
class AppleDailyIE(NextMediaIE):
|
class AppleDailyIE(NextMediaIE):
|
||||||
IE_DESC = '臺灣蘋果日報'
|
IE_DESC = '臺灣蘋果日報'
|
||||||
_VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews|actionnews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
|
_VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/[^/]+/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
|
'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
|
||||||
'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
|
'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
|
||||||
@ -157,6 +164,10 @@ class AppleDailyIE(NextMediaIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/',
|
'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Redirected from http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694
|
||||||
|
'url': 'http://ent.appledaily.com.tw/section/article/headline/20150128/36354694',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_URL_PATTERN = r'\{url: \'(.+)\'\}'
|
_URL_PATTERN = r'\{url: \'(.+)\'\}'
|
||||||
|
@ -10,7 +10,7 @@ from ..utils import update_url_query
|
|||||||
class NickIE(MTVServicesInfoExtractor):
|
class NickIE(MTVServicesInfoExtractor):
|
||||||
# None of videos on the website are still alive?
|
# None of videos on the website are still alive?
|
||||||
IE_NAME = 'nick.com'
|
IE_NAME = 'nick.com'
|
||||||
_VALID_URL = r'https?://(?:www\.)?nick(?:jr)?\.com/(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)'
|
_VALID_URL = r'https?://(?:(?:www|beta)\.)?nick(?:jr)?\.com/(?:[^/]+/)?(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)'
|
||||||
_FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm'
|
_FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.nick.com/videos/clip/alvinnn-and-the-chipmunks-112-full-episode.html',
|
'url': 'http://www.nick.com/videos/clip/alvinnn-and-the-chipmunks-112-full-episode.html',
|
||||||
@ -57,6 +57,9 @@ class NickIE(MTVServicesInfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.nickjr.com/paw-patrol/videos/pups-save-a-goldrush-s3-ep302-full-episode/',
|
'url': 'http://www.nickjr.com/paw-patrol/videos/pups-save-a-goldrush-s3-ep302-full-episode/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://beta.nick.com/nicky-ricky-dicky-and-dawn/videos/nicky-ricky-dicky-dawn-301-full-episode/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _get_feed_query(self, uri):
|
def _get_feed_query(self, uri):
|
||||||
|
@ -7,7 +7,6 @@ import datetime
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse_urlencode,
|
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -40,6 +39,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
|
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
|
||||||
'duration': 33,
|
'duration': 33,
|
||||||
},
|
},
|
||||||
|
'skip': 'Requires an account',
|
||||||
}, {
|
}, {
|
||||||
# File downloaded with and without credentials are different, so omit
|
# File downloaded with and without credentials are different, so omit
|
||||||
# the md5 field
|
# the md5 field
|
||||||
@ -55,6 +55,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'timestamp': 1304065916,
|
'timestamp': 1304065916,
|
||||||
'duration': 209,
|
'duration': 209,
|
||||||
},
|
},
|
||||||
|
'skip': 'Requires an account',
|
||||||
}, {
|
}, {
|
||||||
# 'video exists but is marked as "deleted"
|
# 'video exists but is marked as "deleted"
|
||||||
# md5 is unstable
|
# md5 is unstable
|
||||||
@ -65,9 +66,10 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'description': 'deleted',
|
'description': 'deleted',
|
||||||
'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>',
|
'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>',
|
||||||
'upload_date': '20071224',
|
'upload_date': '20071224',
|
||||||
'timestamp': 1198527840, # timestamp field has different value if logged in
|
'timestamp': int, # timestamp field has different value if logged in
|
||||||
'duration': 304,
|
'duration': 304,
|
||||||
},
|
},
|
||||||
|
'skip': 'Requires an account',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.nicovideo.jp/watch/so22543406',
|
'url': 'http://www.nicovideo.jp/watch/so22543406',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -79,13 +81,12 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'upload_date': '20140104',
|
'upload_date': '20140104',
|
||||||
'uploader': 'アニメロチャンネル',
|
'uploader': 'アニメロチャンネル',
|
||||||
'uploader_id': '312',
|
'uploader_id': '312',
|
||||||
}
|
},
|
||||||
|
'skip': 'The viewing period of the video you were searching for has expired.',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||||
_NETRC_MACHINE = 'niconico'
|
_NETRC_MACHINE = 'niconico'
|
||||||
# Determine whether the downloader used authentication to download video
|
|
||||||
_AUTHENTICATED = False
|
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
@ -109,8 +110,6 @@ class NiconicoIE(InfoExtractor):
|
|||||||
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
|
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
|
||||||
self._downloader.report_warning('unable to log in: bad username or password')
|
self._downloader.report_warning('unable to log in: bad username or password')
|
||||||
return False
|
return False
|
||||||
# Successful login
|
|
||||||
self._AUTHENTICATED = True
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -128,35 +127,19 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
|
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
|
||||||
note='Downloading video info page')
|
note='Downloading video info page')
|
||||||
|
|
||||||
if self._AUTHENTICATED:
|
# Get flv info
|
||||||
# Get flv info
|
flv_info_webpage = self._download_webpage(
|
||||||
flv_info_webpage = self._download_webpage(
|
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
||||||
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
video_id, 'Downloading flv info')
|
||||||
video_id, 'Downloading flv info')
|
|
||||||
else:
|
|
||||||
# Get external player info
|
|
||||||
ext_player_info = self._download_webpage(
|
|
||||||
'http://ext.nicovideo.jp/thumb_watch/' + video_id, video_id)
|
|
||||||
thumb_play_key = self._search_regex(
|
|
||||||
r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey')
|
|
||||||
|
|
||||||
# Get flv info
|
|
||||||
flv_info_data = compat_urllib_parse_urlencode({
|
|
||||||
'k': thumb_play_key,
|
|
||||||
'v': video_id
|
|
||||||
})
|
|
||||||
flv_info_request = sanitized_Request(
|
|
||||||
'http://ext.nicovideo.jp/thumb_watch', flv_info_data,
|
|
||||||
{'Content-Type': 'application/x-www-form-urlencoded'})
|
|
||||||
flv_info_webpage = self._download_webpage(
|
|
||||||
flv_info_request, video_id,
|
|
||||||
note='Downloading flv info', errnote='Unable to download flv info')
|
|
||||||
|
|
||||||
flv_info = compat_urlparse.parse_qs(flv_info_webpage)
|
flv_info = compat_urlparse.parse_qs(flv_info_webpage)
|
||||||
if 'url' not in flv_info:
|
if 'url' not in flv_info:
|
||||||
if 'deleted' in flv_info:
|
if 'deleted' in flv_info:
|
||||||
raise ExtractorError('The video has been deleted.',
|
raise ExtractorError('The video has been deleted.',
|
||||||
expected=True)
|
expected=True)
|
||||||
|
elif 'closed' in flv_info:
|
||||||
|
raise ExtractorError('Niconico videos now require logging in',
|
||||||
|
expected=True)
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Unable to find video URL')
|
raise ExtractorError('Unable to find video URL')
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ class OoyalaBaseIE(InfoExtractor):
|
|||||||
_CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
|
_CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
|
||||||
_AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?'
|
_AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?'
|
||||||
|
|
||||||
def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None):
|
def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None, embed_token=None):
|
||||||
content_tree = self._download_json(content_tree_url, video_id)['content_tree']
|
content_tree = self._download_json(content_tree_url, video_id)['content_tree']
|
||||||
metadata = content_tree[list(content_tree)[0]]
|
metadata = content_tree[list(content_tree)[0]]
|
||||||
embed_code = metadata['embed_code']
|
embed_code = metadata['embed_code']
|
||||||
@ -29,7 +29,8 @@ class OoyalaBaseIE(InfoExtractor):
|
|||||||
self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
|
self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
|
||||||
compat_urllib_parse_urlencode({
|
compat_urllib_parse_urlencode({
|
||||||
'domain': domain,
|
'domain': domain,
|
||||||
'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds',
|
'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds,dash,smooth',
|
||||||
|
'embedToken': embed_token,
|
||||||
}), video_id)
|
}), video_id)
|
||||||
|
|
||||||
cur_auth_data = auth_data['authorization_data'][embed_code]
|
cur_auth_data = auth_data['authorization_data'][embed_code]
|
||||||
@ -52,6 +53,12 @@ class OoyalaBaseIE(InfoExtractor):
|
|||||||
elif delivery_type == 'hds' or ext == 'f4m':
|
elif delivery_type == 'hds' or ext == 'f4m':
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
|
s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
|
||||||
|
elif delivery_type == 'dash' or ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
s_url, embed_code, mpd_id='dash', fatal=False))
|
||||||
|
elif delivery_type == 'smooth':
|
||||||
|
self._extract_ism_formats(
|
||||||
|
s_url, embed_code, ism_id='mss', fatal=False)
|
||||||
elif ext == 'smil':
|
elif ext == 'smil':
|
||||||
formats.extend(self._extract_smil_formats(
|
formats.extend(self._extract_smil_formats(
|
||||||
s_url, embed_code, fatal=False))
|
s_url, embed_code, fatal=False))
|
||||||
@ -146,8 +153,9 @@ class OoyalaIE(OoyalaBaseIE):
|
|||||||
embed_code = self._match_id(url)
|
embed_code = self._match_id(url)
|
||||||
domain = smuggled_data.get('domain')
|
domain = smuggled_data.get('domain')
|
||||||
supportedformats = smuggled_data.get('supportedformats')
|
supportedformats = smuggled_data.get('supportedformats')
|
||||||
|
embed_token = smuggled_data.get('embed_token')
|
||||||
content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code)
|
content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code)
|
||||||
return self._extract(content_tree_url, embed_code, domain, supportedformats)
|
return self._extract(content_tree_url, embed_code, domain, supportedformats, embed_token)
|
||||||
|
|
||||||
|
|
||||||
class OoyalaExternalIE(OoyalaBaseIE):
|
class OoyalaExternalIE(OoyalaBaseIE):
|
||||||
|
@ -64,16 +64,17 @@ class OpenloadIE(InfoExtractor):
|
|||||||
raise ExtractorError('File not found', expected=True)
|
raise ExtractorError('File not found', expected=True)
|
||||||
|
|
||||||
ol_id = self._search_regex(
|
ol_id = self._search_regex(
|
||||||
'<span[^>]+id="[a-zA-Z0-9]+x"[^>]*>([0-9]+)</span>',
|
'<span[^>]+id="[^"]+"[^>]*>([0-9]+)</span>',
|
||||||
webpage, 'openload ID')
|
webpage, 'openload ID')
|
||||||
|
|
||||||
first_two_chars = int(float(ol_id[0:][:2]))
|
first_three_chars = int(float(ol_id[0:][:3]))
|
||||||
|
fifth_char = int(float(ol_id[3:5]))
|
||||||
urlcode = ''
|
urlcode = ''
|
||||||
num = 2
|
num = 5
|
||||||
|
|
||||||
while num < len(ol_id):
|
while num < len(ol_id):
|
||||||
urlcode += compat_chr(int(float(ol_id[num:][:3])) -
|
urlcode += compat_chr(int(float(ol_id[num:][:3])) +
|
||||||
first_two_chars * int(float(ol_id[num + 3:][:2])))
|
first_three_chars - fifth_char * int(float(ol_id[num + 3:][:2])))
|
||||||
num += 5
|
num += 5
|
||||||
|
|
||||||
video_url = 'https://openload.co/stream/' + urlcode
|
video_url = 'https://openload.co/stream/' + urlcode
|
||||||
|
92
youtube_dl/extractor/pornflip.py
Normal file
92
youtube_dl/extractor/pornflip.py
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PornFlipIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z]{11})'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.pornflip.com/v/wz7DfNhMmep',
|
||||||
|
'md5': '98c46639849145ae1fd77af532a9278c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'wz7DfNhMmep',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '2 Amateurs swallow make his dream cumshots true',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 112,
|
||||||
|
'timestamp': 1481655502,
|
||||||
|
'upload_date': '20161213',
|
||||||
|
'uploader_id': '106786',
|
||||||
|
'uploader': 'figifoto',
|
||||||
|
'view_count': int,
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.pornflip.com/embed/wz7DfNhMmep',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'https://www.pornflip.com/v/%s' % video_id, video_id)
|
||||||
|
|
||||||
|
flashvars = compat_parse_qs(self._search_regex(
|
||||||
|
r'<embed[^>]+flashvars=(["\'])(?P<flashvars>(?:(?!\1).)+)\1',
|
||||||
|
webpage, 'flashvars', group='flashvars'))
|
||||||
|
|
||||||
|
title = flashvars['video_vars[title]'][0]
|
||||||
|
|
||||||
|
def flashvar(kind):
|
||||||
|
return try_get(
|
||||||
|
flashvars, lambda x: x['video_vars[%s]' % kind][0], compat_str)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for key, value in flashvars.items():
|
||||||
|
if not (value and isinstance(value, list)):
|
||||||
|
continue
|
||||||
|
format_url = value[0]
|
||||||
|
if key == 'video_vars[hds_manifest]':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
format_url, video_id, mpd_id='dash', fatal=False))
|
||||||
|
continue
|
||||||
|
height = self._search_regex(
|
||||||
|
r'video_vars\[video_urls\]\[(\d+)', key, 'height', default=None)
|
||||||
|
if not height:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': 'http-%s' % height,
|
||||||
|
'height': int_or_none(height),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
(r'<span[^>]+class="name"[^>]*>\s*<a[^>]+>\s*<strong>(?P<uploader>[^<]+)',
|
||||||
|
r'<meta[^>]+content=(["\'])[^>]*\buploaded by (?P<uploader>.+?)\1'),
|
||||||
|
webpage, 'uploader', fatal=False, group='uploader')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': flashvar('big_thumb'),
|
||||||
|
'duration': int_or_none(flashvar('duration')),
|
||||||
|
'timestamp': unified_timestamp(self._html_search_meta(
|
||||||
|
'uploadDate', webpage, 'timestamp')),
|
||||||
|
'uploader_id': flashvar('author_id'),
|
||||||
|
'uploader': uploader,
|
||||||
|
'view_count': int_or_none(flashvar('views')),
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
@ -46,7 +46,7 @@ class SpikeIE(MTVServicesInfoExtractor):
|
|||||||
_CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)')
|
_CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)')
|
||||||
|
|
||||||
def _extract_mgid(self, webpage):
|
def _extract_mgid(self, webpage):
|
||||||
mgid = super(SpikeIE, self)._extract_mgid(webpage, default=None)
|
mgid = super(SpikeIE, self)._extract_mgid(webpage)
|
||||||
if mgid is None:
|
if mgid is None:
|
||||||
url_parts = self._search_regex(self._CUSTOM_URL_REGEX, webpage, 'episode_id')
|
url_parts = self._search_regex(self._CUSTOM_URL_REGEX, webpage, 'episode_id')
|
||||||
video_type, episode_id = url_parts.split('/', 1)
|
video_type, episode_id = url_parts.split('/', 1)
|
||||||
|
@ -4,11 +4,10 @@ from __future__ import unicode_literals
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
try_get,
|
try_get,
|
||||||
update_url_query,
|
determine_ext,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -28,7 +27,7 @@ class TV4IE(InfoExtractor):
|
|||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650',
|
'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650',
|
||||||
'md5': '909d6454b87b10a25aa04c4bdd416a9b',
|
'md5': 'cb837212f342d77cec06e6dad190e96d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2491650',
|
'id': '2491650',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -40,7 +39,7 @@ class TV4IE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.tv4play.se/iframe/video/3054113',
|
'url': 'http://www.tv4play.se/iframe/video/3054113',
|
||||||
'md5': '77f851c55139ffe0ebd41b6a5552489b',
|
'md5': 'cb837212f342d77cec06e6dad190e96d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3054113',
|
'id': '3054113',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -75,11 +74,10 @@ class TV4IE(InfoExtractor):
|
|||||||
# If is_geo_restricted is true, it doesn't necessarily mean we can't download it
|
# If is_geo_restricted is true, it doesn't necessarily mean we can't download it
|
||||||
if info.get('is_geo_restricted'):
|
if info.get('is_geo_restricted'):
|
||||||
self.report_warning('This content might not be available in your country due to licensing restrictions.')
|
self.report_warning('This content might not be available in your country due to licensing restrictions.')
|
||||||
if info.get('requires_subscription'):
|
|
||||||
raise ExtractorError('This content requires subscription.', expected=True)
|
|
||||||
|
|
||||||
title = info['title']
|
title = info['title']
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
formats = []
|
formats = []
|
||||||
# http formats are linked with unresolvable host
|
# http formats are linked with unresolvable host
|
||||||
for kind in ('hls', ''):
|
for kind in ('hls', ''):
|
||||||
@ -87,26 +85,41 @@ class TV4IE(InfoExtractor):
|
|||||||
'https://prima.tv4play.se/api/web/asset/%s/play.json' % video_id,
|
'https://prima.tv4play.se/api/web/asset/%s/play.json' % video_id,
|
||||||
video_id, 'Downloading sources JSON', query={
|
video_id, 'Downloading sources JSON', query={
|
||||||
'protocol': kind,
|
'protocol': kind,
|
||||||
'videoFormat': 'MP4+WEBVTTS+WEBVTT',
|
'videoFormat': 'MP4+WEBVTT',
|
||||||
})
|
})
|
||||||
item = try_get(data, lambda x: x['playback']['items']['item'], dict)
|
items = try_get(data, lambda x: x['playback']['items']['item'])
|
||||||
manifest_url = item.get('url')
|
if not items:
|
||||||
if not isinstance(manifest_url, compat_str):
|
|
||||||
continue
|
continue
|
||||||
if kind == 'hls':
|
if isinstance(items, dict):
|
||||||
formats.extend(self._extract_m3u8_formats(
|
items = [items]
|
||||||
manifest_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
for item in items:
|
||||||
m3u8_id=kind, fatal=False))
|
manifest_url = item.get('url')
|
||||||
else:
|
if not isinstance(manifest_url, compat_str):
|
||||||
formats.extend(self._extract_f4m_formats(
|
continue
|
||||||
update_url_query(manifest_url, {'hdcore': '3.8.0'}),
|
ext = determine_ext(manifest_url)
|
||||||
video_id, f4m_id='hds', fatal=False))
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
manifest_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=kind, fatal=False))
|
||||||
|
elif ext == 'f4m':
|
||||||
|
formats.extend(self._extract_akamai_formats(
|
||||||
|
manifest_url, video_id, {
|
||||||
|
'hls': 'tv4play-i.akamaihd.net',
|
||||||
|
}))
|
||||||
|
elif ext == 'webvtt':
|
||||||
|
subtitles = self._merge_subtitles(
|
||||||
|
subtitles, {
|
||||||
|
'sv': [{
|
||||||
|
'url': manifest_url,
|
||||||
|
'ext': 'vtt',
|
||||||
|
}]})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
'description': info.get('description'),
|
'description': info.get('description'),
|
||||||
'timestamp': parse_iso8601(info.get('broadcast_date_time')),
|
'timestamp': parse_iso8601(info.get('broadcast_date_time')),
|
||||||
'duration': int_or_none(info.get('duration')),
|
'duration': int_or_none(info.get('duration')),
|
||||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class TwentyFourVideoIE(InfoExtractor):
|
class TwentyFourVideoIE(InfoExtractor):
|
||||||
IE_NAME = '24video'
|
IE_NAME = '24video'
|
||||||
_VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx|sex)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.24video.net/video/view/1044982',
|
'url': 'http://www.24video.net/video/view/1044982',
|
||||||
@ -43,7 +43,7 @@ class TwentyFourVideoIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://www.24video.net/video/view/%s' % video_id, video_id)
|
'http://www.24video.sex/video/view/%s' % video_id, video_id)
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
title = self._og_search_title(webpage)
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
@ -69,11 +69,11 @@ class TwentyFourVideoIE(InfoExtractor):
|
|||||||
|
|
||||||
# Sets some cookies
|
# Sets some cookies
|
||||||
self._download_xml(
|
self._download_xml(
|
||||||
r'http://www.24video.net/video/xml/%s?mode=init' % video_id,
|
r'http://www.24video.sex/video/xml/%s?mode=init' % video_id,
|
||||||
video_id, 'Downloading init XML')
|
video_id, 'Downloading init XML')
|
||||||
|
|
||||||
video_xml = self._download_xml(
|
video_xml = self._download_xml(
|
||||||
'http://www.24video.net/video/xml/%s?mode=play' % video_id,
|
'http://www.24video.sex/video/xml/%s?mode=play' % video_id,
|
||||||
video_id, 'Downloading video XML')
|
video_id, 'Downloading video XML')
|
||||||
|
|
||||||
video = xpath_element(video_xml, './/video', 'video', fatal=True)
|
video = xpath_element(video_xml, './/video', 'video', fatal=True)
|
||||||
|
@ -4,91 +4,88 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import remove_end
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TwentyMinutenIE(InfoExtractor):
|
class TwentyMinutenIE(InfoExtractor):
|
||||||
IE_NAME = '20min'
|
IE_NAME = '20min'
|
||||||
_VALID_URL = r'https?://(?:www\.)?20min\.ch/(?:videotv/*\?.*\bvid=(?P<id>\d+)|(?:[^/]+/)*(?P<display_id>[^/#?]+))'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:www\.)?20min\.ch/
|
||||||
|
(?:
|
||||||
|
videotv/*\?.*?\bvid=|
|
||||||
|
videoplayer/videoplayer\.html\?.*?\bvideoId@
|
||||||
|
)
|
||||||
|
(?P<id>\d+)
|
||||||
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# regular video
|
|
||||||
'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2',
|
'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2',
|
||||||
'md5': 'b52d6bc6ea6398e6a38f12cfd418149c',
|
'md5': 'e7264320db31eed8c38364150c12496e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '469148',
|
'id': '469148',
|
||||||
'ext': 'flv',
|
|
||||||
'title': '85 000 Franken für 15 perfekte Minuten',
|
|
||||||
'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)',
|
|
||||||
'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg'
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
# news article with video
|
|
||||||
'url': 'http://www.20min.ch/schweiz/news/story/-Wir-muessen-mutig-nach-vorne-schauen--22050469',
|
|
||||||
'md5': 'cd4cbb99b94130cff423e967cd275e5e',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '469408',
|
|
||||||
'display_id': '-Wir-muessen-mutig-nach-vorne-schauen--22050469',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': '«Wir müssen mutig nach vorne schauen»',
|
|
||||||
'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.',
|
|
||||||
'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg'
|
|
||||||
},
|
|
||||||
'skip': '"This video is no longer available" is shown both on the web page and in the downloaded file.',
|
|
||||||
}, {
|
|
||||||
# YouTube embed
|
|
||||||
'url': 'http://www.20min.ch/ro/sports/football/story/Il-marque-une-bicyclette-de-plus-de-30-metres--21115184',
|
|
||||||
'md5': 'cec64d59aa01c0ed9dbba9cf639dd82f',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'ivM7A7SpDOs',
|
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'GOLAZO DE CHILENA DE JAVI GÓMEZ, FINALISTA AL BALÓN DE CLM 2016',
|
'title': '85 000 Franken für 15 perfekte Minuten',
|
||||||
'description': 'md5:903c92fbf2b2f66c09de514bc25e9f5a',
|
'thumbnail': r're:https?://.*\.jpg$',
|
||||||
'upload_date': '20160424',
|
},
|
||||||
'uploader': 'RTVCM Castilla-La Mancha',
|
}, {
|
||||||
'uploader_id': 'RTVCM',
|
'url': 'http://www.20min.ch/videoplayer/videoplayer.html?params=client@twentyDE|videoId@523629',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '523629',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'So kommen Sie bei Eis und Schnee sicher an',
|
||||||
|
'description': 'md5:117c212f64b25e3d95747e5276863f7d',
|
||||||
|
'thumbnail': r're:https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'add_ie': ['Youtube'],
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738',
|
'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
|
||||||
'url': 'http://www.20min.ch/ro/sortir/cinema/story/Grandir-au-bahut--c-est-dur-18927411',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
return [m.group('url') for m in re.finditer(
|
||||||
|
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1',
|
||||||
|
webpage)]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
display_id = mobj.group('display_id') or video_id
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
video = self._download_json(
|
||||||
|
'http://api.20min.ch/video/%s/show' % video_id,
|
||||||
|
video_id)['content']
|
||||||
|
|
||||||
youtube_url = self._html_search_regex(
|
title = video['title']
|
||||||
r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"',
|
|
||||||
webpage, 'YouTube embed URL', default=None)
|
|
||||||
if youtube_url is not None:
|
|
||||||
return self.url_result(youtube_url, 'Youtube')
|
|
||||||
|
|
||||||
title = self._html_search_regex(
|
formats = [{
|
||||||
r'<h1>.*?<span>(.+?)</span></h1>',
|
'format_id': format_id,
|
||||||
webpage, 'title', default=None)
|
'url': 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, p),
|
||||||
if not title:
|
'quality': quality,
|
||||||
title = remove_end(re.sub(
|
} for quality, (format_id, p) in enumerate([('sd', ''), ('hd', 'h')])]
|
||||||
r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News')
|
self._sort_formats(formats)
|
||||||
|
|
||||||
if not video_id:
|
description = video.get('lead')
|
||||||
video_id = self._search_regex(
|
thumbnail = video.get('thumbnail')
|
||||||
r'"file\d?"\s*,\s*\"(\d+)', webpage, 'video id')
|
|
||||||
|
|
||||||
description = self._html_search_meta(
|
def extract_count(kind):
|
||||||
'description', webpage, 'description')
|
return try_get(
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
video,
|
||||||
|
lambda x: int_or_none(x['communityobject']['thumbs_%s' % kind]))
|
||||||
|
|
||||||
|
like_count = extract_count('up')
|
||||||
|
dislike_count = extract_count('down')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
|
||||||
'url': 'http://speed.20min-tv.ch/%sm.flv' % video_id,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
'like_count': like_count,
|
||||||
|
'dislike_count': dislike_count,
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -84,12 +84,27 @@ class UOLIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
if not video_id.isdigit():
|
media_id = None
|
||||||
embed_page = self._download_webpage('https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id, video_id)
|
|
||||||
video_id = self._search_regex(r'mediaId=(\d+)', embed_page, 'media id')
|
if video_id.isdigit():
|
||||||
|
media_id = video_id
|
||||||
|
|
||||||
|
if not media_id:
|
||||||
|
embed_page = self._download_webpage(
|
||||||
|
'https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id,
|
||||||
|
video_id, 'Downloading embed page', fatal=False)
|
||||||
|
if embed_page:
|
||||||
|
media_id = self._search_regex(
|
||||||
|
(r'uol\.com\.br/(\d+)', r'mediaId=(\d+)'),
|
||||||
|
embed_page, 'media id', default=None)
|
||||||
|
|
||||||
|
if not media_id:
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
media_id = self._search_regex(r'mediaId=(\d+)', webpage, 'media id')
|
||||||
|
|
||||||
video_data = self._download_json(
|
video_data = self._download_json(
|
||||||
'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % video_id,
|
'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % media_id,
|
||||||
video_id)['item']
|
media_id)['item']
|
||||||
title = video_data['title']
|
title = video_data['title']
|
||||||
|
|
||||||
query = {
|
query = {
|
||||||
@ -118,7 +133,7 @@ class UOLIE(InfoExtractor):
|
|||||||
tags.append(tag_description)
|
tags.append(tag_description)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': media_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': clean_html(video_data.get('desMedia')),
|
'description': clean_html(video_data.get('desMedia')),
|
||||||
'thumbnail': video_data.get('thumbnail'),
|
'thumbnail': video_data.get('thumbnail'),
|
||||||
|
@ -69,6 +69,13 @@ class UstreamIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_url(webpage):
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return mobj.group('url')
|
||||||
|
|
||||||
def _get_stream_info(self, url, video_id, app_id_ver, extra_note=None):
|
def _get_stream_info(self, url, video_id, app_id_ver, extra_note=None):
|
||||||
def num_to_hex(n):
|
def num_to_hex(n):
|
||||||
return hex(n)[2:]
|
return hex(n)[2:]
|
||||||
|
@ -206,7 +206,7 @@ class VevoIE(VevoBaseIE):
|
|||||||
note='Retrieving oauth token',
|
note='Retrieving oauth token',
|
||||||
errnote='Unable to retrieve oauth token')
|
errnote='Unable to retrieve oauth token')
|
||||||
|
|
||||||
if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage:
|
if re.search(r'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage):
|
||||||
self.raise_geo_restricted(
|
self.raise_geo_restricted(
|
||||||
'%s said: This page is currently unavailable in your region' % self.IE_NAME)
|
'%s said: This page is currently unavailable in your region' % self.IE_NAME)
|
||||||
|
|
||||||
|
@ -254,7 +254,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
'uploader_id': 'user18948128',
|
'uploader_id': 'user18948128',
|
||||||
'uploader': 'Jaime Marquínez Ferrándiz',
|
'uploader': 'Jaime Marquínez Ferrándiz',
|
||||||
'duration': 10,
|
'duration': 10,
|
||||||
'description': 'This is "youtube-dl password protected test video" by on Vimeo, the home for high quality videos and the people who love them.',
|
'description': 'md5:dca3ea23adb29ee387127bc4ddfce63f',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'videopassword': 'youtube-dl',
|
'videopassword': 'youtube-dl',
|
||||||
@ -306,7 +306,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
{
|
{
|
||||||
# contains original format
|
# contains original format
|
||||||
'url': 'https://vimeo.com/33951933',
|
'url': 'https://vimeo.com/33951933',
|
||||||
'md5': '2d9f5475e0537f013d0073e812ab89e6',
|
'md5': '53c688fa95a55bf4b7293d37a89c5c53',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '33951933',
|
'id': '33951933',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -324,7 +324,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
'url': 'https://vimeo.com/channels/tributes/6213729',
|
'url': 'https://vimeo.com/channels/tributes/6213729',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '6213729',
|
'id': '6213729',
|
||||||
'ext': 'mp4',
|
'ext': 'mov',
|
||||||
'title': 'Vimeo Tribute: The Shining',
|
'title': 'Vimeo Tribute: The Shining',
|
||||||
'uploader': 'Casey Donahue',
|
'uploader': 'Casey Donahue',
|
||||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue',
|
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue',
|
||||||
@ -338,7 +338,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
'expected_warnings': ['Unable to download JSON metadata'],
|
'expected_warnings': ['Unable to download JSON metadata'],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# redirects to ondemand extractor and should be passed throught it
|
# redirects to ondemand extractor and should be passed through it
|
||||||
# for successful extraction
|
# for successful extraction
|
||||||
'url': 'https://vimeo.com/73445910',
|
'url': 'https://vimeo.com/73445910',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -629,6 +629,9 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor):
|
|||||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/gumfilms',
|
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/gumfilms',
|
||||||
'uploader_id': 'gumfilms',
|
'uploader_id': 'gumfilms',
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'best[protocol=https]',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# requires Referer to be passed along with og:video:url
|
# requires Referer to be passed along with og:video:url
|
||||||
'url': 'https://vimeo.com/ondemand/36938/126682985',
|
'url': 'https://vimeo.com/ondemand/36938/126682985',
|
||||||
@ -727,12 +730,12 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
|
|||||||
# Try extracting href first since not all videos are available via
|
# Try extracting href first since not all videos are available via
|
||||||
# short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729)
|
# short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729)
|
||||||
clips = re.findall(
|
clips = re.findall(
|
||||||
r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)', webpage)
|
r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)(?:[^>]+\btitle="([^"]+)")?', webpage)
|
||||||
if clips:
|
if clips:
|
||||||
for video_id, video_url in clips:
|
for video_id, video_url, video_title in clips:
|
||||||
yield self.url_result(
|
yield self.url_result(
|
||||||
compat_urlparse.urljoin(base_url, video_url),
|
compat_urlparse.urljoin(base_url, video_url),
|
||||||
VimeoIE.ie_key(), video_id=video_id)
|
VimeoIE.ie_key(), video_id=video_id, video_title=video_title)
|
||||||
# More relaxed fallback
|
# More relaxed fallback
|
||||||
else:
|
else:
|
||||||
for video_id in re.findall(r'id=["\']clip_(\d+)', webpage):
|
for video_id in re.findall(r'id=["\']clip_(\d+)', webpage):
|
||||||
|
@ -16,7 +16,9 @@ class XiamiBaseIE(InfoExtractor):
|
|||||||
return webpage
|
return webpage
|
||||||
|
|
||||||
def _extract_track(self, track, track_id=None):
|
def _extract_track(self, track, track_id=None):
|
||||||
title = track['title']
|
track_name = track.get('songName') or track.get('name') or track['subName']
|
||||||
|
artist = track.get('artist') or track.get('artist_name') or track.get('singers')
|
||||||
|
title = '%s - %s' % (artist, track_name) if artist else track_name
|
||||||
track_url = self._decrypt(track['location'])
|
track_url = self._decrypt(track['location'])
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
@ -31,9 +33,10 @@ class XiamiBaseIE(InfoExtractor):
|
|||||||
'thumbnail': track.get('pic') or track.get('album_pic'),
|
'thumbnail': track.get('pic') or track.get('album_pic'),
|
||||||
'duration': int_or_none(track.get('length')),
|
'duration': int_or_none(track.get('length')),
|
||||||
'creator': track.get('artist', '').split(';')[0],
|
'creator': track.get('artist', '').split(';')[0],
|
||||||
'track': title,
|
'track': track_name,
|
||||||
'album': track.get('album_name'),
|
'track_number': int_or_none(track.get('track')),
|
||||||
'artist': track.get('artist'),
|
'album': track.get('album_name') or track.get('title'),
|
||||||
|
'artist': artist,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -68,14 +71,14 @@ class XiamiBaseIE(InfoExtractor):
|
|||||||
class XiamiSongIE(XiamiBaseIE):
|
class XiamiSongIE(XiamiBaseIE):
|
||||||
IE_NAME = 'xiami:song'
|
IE_NAME = 'xiami:song'
|
||||||
IE_DESC = '虾米音乐'
|
IE_DESC = '虾米音乐'
|
||||||
_VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.xiami.com/song/1775610518',
|
'url': 'http://www.xiami.com/song/1775610518',
|
||||||
'md5': '521dd6bea40fd5c9c69f913c232cb57e',
|
'md5': '521dd6bea40fd5c9c69f913c232cb57e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1775610518',
|
'id': '1775610518',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Woman',
|
'title': 'HONNE - Woman',
|
||||||
'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
|
'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
|
||||||
'duration': 265,
|
'duration': 265,
|
||||||
'creator': 'HONNE',
|
'creator': 'HONNE',
|
||||||
@ -95,7 +98,7 @@ class XiamiSongIE(XiamiBaseIE):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1775256504',
|
'id': '1775256504',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': '悟空',
|
'title': '戴荃 - 悟空',
|
||||||
'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
|
'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
|
||||||
'duration': 200,
|
'duration': 200,
|
||||||
'creator': '戴荃',
|
'creator': '戴荃',
|
||||||
@ -109,6 +112,26 @@ class XiamiSongIE(XiamiBaseIE):
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
'skip': 'Georestricted',
|
'skip': 'Georestricted',
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.xiami.com/song/1775953850',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1775953850',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'До Скону - Чума Пожирает Землю',
|
||||||
|
'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
|
||||||
|
'duration': 683,
|
||||||
|
'creator': 'До Скону',
|
||||||
|
'track': 'Чума Пожирает Землю',
|
||||||
|
'track_number': 7,
|
||||||
|
'album': 'Ад',
|
||||||
|
'artist': 'До Скону',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.xiami.com/song/xLHGwgd07a1',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -124,7 +147,7 @@ class XiamiPlaylistBaseIE(XiamiBaseIE):
|
|||||||
class XiamiAlbumIE(XiamiPlaylistBaseIE):
|
class XiamiAlbumIE(XiamiPlaylistBaseIE):
|
||||||
IE_NAME = 'xiami:album'
|
IE_NAME = 'xiami:album'
|
||||||
IE_DESC = '虾米音乐 - 专辑'
|
IE_DESC = '虾米音乐 - 专辑'
|
||||||
_VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P<id>[^/?#&]+)'
|
||||||
_TYPE = '1'
|
_TYPE = '1'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.xiami.com/album/2100300444',
|
'url': 'http://www.xiami.com/album/2100300444',
|
||||||
@ -136,28 +159,34 @@ class XiamiAlbumIE(XiamiPlaylistBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9',
|
'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.xiami.com/album/URVDji2a506',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
class XiamiArtistIE(XiamiPlaylistBaseIE):
|
class XiamiArtistIE(XiamiPlaylistBaseIE):
|
||||||
IE_NAME = 'xiami:artist'
|
IE_NAME = 'xiami:artist'
|
||||||
IE_DESC = '虾米音乐 - 歌手'
|
IE_DESC = '虾米音乐 - 歌手'
|
||||||
_VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P<id>[^/?#&]+)'
|
||||||
_TYPE = '2'
|
_TYPE = '2'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp',
|
'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2132',
|
'id': '2132',
|
||||||
},
|
},
|
||||||
'playlist_count': 20,
|
'playlist_count': 20,
|
||||||
'skip': 'Georestricted',
|
'skip': 'Georestricted',
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.xiami.com/artist/bC5Tk2K6eb99',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
class XiamiCollectionIE(XiamiPlaylistBaseIE):
|
class XiamiCollectionIE(XiamiPlaylistBaseIE):
|
||||||
IE_NAME = 'xiami:collection'
|
IE_NAME = 'xiami:collection'
|
||||||
IE_DESC = '虾米音乐 - 精选集'
|
IE_DESC = '虾米音乐 - 精选集'
|
||||||
_VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P<id>[^/?#&]+)'
|
||||||
_TYPE = '3'
|
_TYPE = '3'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr',
|
'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr',
|
||||||
|
@ -2,44 +2,37 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import urljoin
|
||||||
|
|
||||||
|
|
||||||
class YourUploadIE(InfoExtractor):
|
class YourUploadIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https?://(?:www\.)?
|
_VALID_URL = r'https?://(?:www\.)?(?:yourupload\.com/(?:watch|embed)|embed\.yourupload\.com)/(?P<id>[A-Za-z0-9]+)'
|
||||||
(?:yourupload\.com/watch|
|
_TESTS = [{
|
||||||
embed\.yourupload\.com|
|
'url': 'http://yourupload.com/watch/14i14h',
|
||||||
embed\.yucache\.net
|
'md5': '5e2c63385454c557f97c4c4131a393cd',
|
||||||
)/(?P<id>[A-Za-z0-9]+)
|
'info_dict': {
|
||||||
'''
|
'id': '14i14h',
|
||||||
_TESTS = [
|
'ext': 'mp4',
|
||||||
{
|
'title': 'BigBuckBunny_320x180.mp4',
|
||||||
'url': 'http://yourupload.com/watch/14i14h',
|
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||||
'md5': '5e2c63385454c557f97c4c4131a393cd',
|
}
|
||||||
'info_dict': {
|
}, {
|
||||||
'id': '14i14h',
|
'url': 'http://www.yourupload.com/embed/14i14h',
|
||||||
'ext': 'mp4',
|
'only_matching': True,
|
||||||
'title': 'BigBuckBunny_320x180.mp4',
|
}, {
|
||||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
'url': 'http://embed.yourupload.com/14i14h',
|
||||||
}
|
'only_matching': True,
|
||||||
},
|
}]
|
||||||
{
|
|
||||||
'url': 'http://embed.yourupload.com/14i14h',
|
|
||||||
'only_matching': True,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://embed.yucache.net/14i14h?client_file_id=803349',
|
|
||||||
'only_matching': True,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
embed_url = 'http://embed.yucache.net/{0:}'.format(video_id)
|
embed_url = 'http://www.yourupload.com/embed/%s' % video_id
|
||||||
|
|
||||||
webpage = self._download_webpage(embed_url, video_id)
|
webpage = self._download_webpage(embed_url, video_id)
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
title = self._og_search_title(webpage)
|
||||||
video_url = self._og_search_video_url(webpage)
|
video_url = urljoin(embed_url, self._og_search_video_url(webpage))
|
||||||
thumbnail = self._og_search_thumbnail(webpage, default=None)
|
thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -316,6 +316,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||||
'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
|
'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
|
||||||
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||||
|
'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||||
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||||
'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
|
'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
|
||||||
'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
|
'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
|
||||||
@ -862,6 +863,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# YouTube Red video with episode data
|
||||||
|
'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'iqKdEhx-dD4',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Isolation - Mind Field (Ep 1)',
|
||||||
|
'description': 'md5:3a72f23c086a1496c9e2c54a25fa0822',
|
||||||
|
'upload_date': '20170118',
|
||||||
|
'uploader': 'Vsauce',
|
||||||
|
'uploader_id': 'Vsauce',
|
||||||
|
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
|
||||||
|
'license': 'Standard YouTube License',
|
||||||
|
'series': 'Mind Field',
|
||||||
|
'season_number': 1,
|
||||||
|
'episode_number': 1,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': [
|
||||||
|
'Skipping DASH manifest',
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# itag 212
|
||||||
|
'url': '1t24XAntNCY',
|
||||||
|
'only_matching': True,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -1448,6 +1478,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
else:
|
else:
|
||||||
video_alt_title = video_creator = None
|
video_alt_title = video_creator = None
|
||||||
|
|
||||||
|
m_episode = re.search(
|
||||||
|
r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
|
||||||
|
video_webpage)
|
||||||
|
if m_episode:
|
||||||
|
series = m_episode.group('series')
|
||||||
|
season_number = int(m_episode.group('season'))
|
||||||
|
episode_number = int(m_episode.group('episode'))
|
||||||
|
else:
|
||||||
|
series = season_number = episode_number = None
|
||||||
|
|
||||||
m_cat_container = self._search_regex(
|
m_cat_container = self._search_regex(
|
||||||
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
|
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
|
||||||
video_webpage, 'categories', default=None)
|
video_webpage, 'categories', default=None)
|
||||||
@ -1737,6 +1777,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
'start_time': start_time,
|
'start_time': start_time,
|
||||||
'end_time': end_time,
|
'end_time': end_time,
|
||||||
|
'series': series,
|
||||||
|
'season_number': season_number,
|
||||||
|
'episode_number': episode_number,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1813,6 +1856,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
|||||||
'title': 'YDL_Empty_List',
|
'title': 'YDL_Empty_List',
|
||||||
},
|
},
|
||||||
'playlist_count': 0,
|
'playlist_count': 0,
|
||||||
|
'skip': 'This playlist is private',
|
||||||
}, {
|
}, {
|
||||||
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
|
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
|
||||||
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
|
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
|
||||||
@ -1844,6 +1888,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
|||||||
'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
|
'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
|
||||||
},
|
},
|
||||||
'playlist_count': 2,
|
'playlist_count': 2,
|
||||||
|
'skip': 'This playlist is private',
|
||||||
}, {
|
}, {
|
||||||
'note': 'embedded',
|
'note': 'embedded',
|
||||||
'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
|
'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
|
||||||
@ -1955,14 +2000,18 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
|||||||
url = self._TEMPLATE_URL % playlist_id
|
url = self._TEMPLATE_URL % playlist_id
|
||||||
page = self._download_webpage(url, playlist_id)
|
page = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
|
# the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)
|
||||||
|
for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
|
||||||
match = match.strip()
|
match = match.strip()
|
||||||
# Check if the playlist exists or is private
|
# Check if the playlist exists or is private
|
||||||
if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):
|
mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
|
||||||
raise ExtractorError(
|
if mobj:
|
||||||
'The playlist doesn\'t exist or is private, use --username or '
|
reason = mobj.group('reason')
|
||||||
'--netrc to access it.',
|
message = 'This playlist %s' % reason
|
||||||
expected=True)
|
if 'private' in reason:
|
||||||
|
message += ', use --username or --netrc to access it'
|
||||||
|
message += '.'
|
||||||
|
raise ExtractorError(message, expected=True)
|
||||||
elif re.match(r'[^<]*Invalid parameters[^<]*', match):
|
elif re.match(r'[^<]*Invalid parameters[^<]*', match):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Invalid parameters. Maybe URL is incorrect.',
|
'Invalid parameters. Maybe URL is incorrect.',
|
||||||
|
@ -213,7 +213,7 @@ class JSInterpreter(object):
|
|||||||
def extract_object(self, objname):
|
def extract_object(self, objname):
|
||||||
obj = {}
|
obj = {}
|
||||||
obj_m = re.search(
|
obj_m = re.search(
|
||||||
(r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) +
|
(r'(?<!this\.)%s\s*=\s*\{' % re.escape(objname)) +
|
||||||
r'\s*(?P<fields>([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\}(?:,\s*)?)*)' +
|
r'\s*(?P<fields>([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\}(?:,\s*)?)*)' +
|
||||||
r'\}\s*;',
|
r'\}\s*;',
|
||||||
self.code)
|
self.code)
|
||||||
|
@ -751,7 +751,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
|
help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
|
||||||
postproc.add_option(
|
postproc.add_option(
|
||||||
'--audio-format', metavar='FORMAT', dest='audioformat', default='best',
|
'--audio-format', metavar='FORMAT', dest='audioformat', default='best',
|
||||||
help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default')
|
help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default; No effect without -x')
|
||||||
postproc.add_option(
|
postproc.add_option(
|
||||||
'--audio-quality', metavar='QUALITY',
|
'--audio-quality', metavar='QUALITY',
|
||||||
dest='audioquality', default='5',
|
dest='audioquality', default='5',
|
||||||
@ -867,7 +867,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
if '--ignore-config' not in system_conf:
|
if '--ignore-config' not in system_conf:
|
||||||
user_conf = _readUserConf()
|
user_conf = _readUserConf()
|
||||||
|
|
||||||
argv = system_conf + user_conf + command_line_conf
|
argv = system_conf + user_conf + custom_conf + command_line_conf
|
||||||
opts, args = parser.parse_args(argv)
|
opts, args = parser.parse_args(argv)
|
||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
for conf_label, conf in (
|
for conf_label, conf in (
|
||||||
|
@ -128,7 +128,13 @@ DATE_FORMATS = (
|
|||||||
'%d %B %Y',
|
'%d %B %Y',
|
||||||
'%d %b %Y',
|
'%d %b %Y',
|
||||||
'%B %d %Y',
|
'%B %d %Y',
|
||||||
|
'%B %dst %Y',
|
||||||
|
'%B %dnd %Y',
|
||||||
|
'%B %dth %Y',
|
||||||
'%b %d %Y',
|
'%b %d %Y',
|
||||||
|
'%b %dst %Y',
|
||||||
|
'%b %dnd %Y',
|
||||||
|
'%b %dth %Y',
|
||||||
'%b %dst %Y %I:%M',
|
'%b %dst %Y %I:%M',
|
||||||
'%b %dnd %Y %I:%M',
|
'%b %dnd %Y %I:%M',
|
||||||
'%b %dth %Y %I:%M',
|
'%b %dth %Y %I:%M',
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2017.01.05'
|
__version__ = '2017.01.22'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user